From f30da45690c7ed133d73de76f8790563d72979d7 Mon Sep 17 00:00:00 2001 From: PDillis Date: Mon, 28 Mar 2022 22:24:16 +0200 Subject: [PATCH] PR #80, #143 and #111, #116, #125 added for correctly building conda env in Windows; update README; add models in gen_utils.py; general code linting --- README.md | 174 +++++++++++++++++++++++++-------------- environment.yml | 5 +- style_mixing.py | 23 +++--- torch_utils/gen_utils.py | 19 +++-- 4 files changed, 140 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index 15ebb683..a97eb06e 100644 --- a/README.md +++ b/README.md @@ -8,62 +8,105 @@ of being backwards-compatible. As such, we can use our previously-trained models get acquainted with the official repository and its codebase, as we will be building upon it and as such, increase its capabilities (but hopefully not its complexity!). -This repository adds the following (not yet the complete list): +This repository adds/has the following changes (not yet the complete list): * Dataset tool * Add `--center-crop-tall`: add vertical black bars to the sides instead, in the same vein as the horizontal bars in `--center-crop-wide`. * Grayscale images in the dataset are converted to `RGB`. * If the dataset tool encounters an error, print it along the offending image, but continue with the rest of the dataset - ([pull #39](https://github.com/NVlabs/stylegan3/pull/39) from [Andreas Jansson](https://github.com/andreasjansson)). - * *TODO*: Add multi-crop, as used in [Earth View](https://github.com/PDillis/earthview#multi-crop---data_augmentpy). + ([PR #39](https://github.com/NVlabs/stylegan3/pull/39) from [Andreas Jansson](https://github.com/andreasjansson)). + * ***TODO:*** Add multi-crop, as used in [Earth View](https://github.com/PDillis/earthview#multi-crop---data_augmentpy). * Training - * `--mirrory`: Added vertical mirroring for doubling the dataset size - * `--gamma`: If no R1 regularization is provided, the heuristic formula will be used from [StyleGAN2](https://github.com/NVlabs/stylegan2). - * `--aug`: ***TODO*** add [Deceive-D/APA](https://github.com/EndlessSora/DeceiveD) as an option. - * `--augpipe`: Now available to use is [StyleGAN2-ADA's](https://github.com/NVlabs/stylegan2-ada-pytorch) full list of augpipe, e,g., `blit`, `geom`, `bgc`, `bgcfnc`, etc. - * `--img-snap`: When to save snapshot images, so now it's independent of when the model is saved; - * `--snap-res`: The resolution of the snapshots, depending on your screen resolution, or how many images you wish to see per tick. Available resolutions: `1080p`, `4k`, and `8k`. + * `--mirrory`: Added vertical mirroring for doubling the dataset size (quadrupling if `--mirror` is used; make sure your dataset has either or both + of these symmetries in order for it to make sense to use them) + * `--gamma`: If no R1 regularization is provided, the heuristic formula from [StyleGAN2](https://github.com/NVlabs/stylegan2) will be used. + * `--aug`: ***TODO:*** add [Deceive-D/APA](https://github.com/EndlessSora/DeceiveD) as an option. + * `--augpipe`: Now available to use is [StyleGAN2-ADA's](https://github.com/NVlabs/stylegan2-ada-pytorch) full list of augpipe, i.e., individual augmentations (`blit`, `geom`, `color`, `filter`, `noise`, `cutout`) or their combinations (`bg`, `bgc`, `bgcf`, `bgcfn`, `bgcfnc`). + * `--img-snap`: Set when to save snapshot images, so now it's independent of when the model is saved (e.g., save image snapshots more often to know how the model is training without saving the model itself, to save space). + * `--snap-res`: The resolution of the snapshots, depending on how many images you wish to see per snapshot. Available resolutions: `1080p`, `4k`, and `8k`. * `--resume-kimg`: Starting number of `kimg`, useful when continuing training a previous run - * `--outdir`: Automatically set as `training-runs` + * `--outdir`: Automatically set as `training-runs`, so no need to set beforehand (in general this is true throughout the repository) * `--metrics`: Now set by default to `None`, so there's no need to worry about this one - * `--resume`: All available pre-trained models from NVIDIA can be found with a simple dictionary, depending on the `--cfg` used. - For example, if `--cfg=stylegan3-r`, then to transfer learn from FFHQU at 1024 resolution, set `--resume=ffhqu1024`. - ***TODO***: finish the following table, but full list available [here](https://github.com/PDillis/stylegan3-fun/blob/0bfa8e108487b50d6ecb73718c60497f063d8c17/train.py#L297). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Available Models
ffhq256
ffhqu256
ffhq512
ffhq1024
ffhqu1024
stylegan2
:heavy_check_mark::heavy_check_mark::heavy_check_mark::heavy_check_mark::heavy_check_mark:
stylegan3-t
:heavy_check_mark::heavy_check_mark::heavy_check_mark:
stylegan3-r
:heavy_check_mark::heavy_check_mark::heavy_check_mark:
+ * `--resume`: All available pre-trained models from NVIDIA (and more) can be used with a simple dictionary, depending on the `--cfg` used. + For example, if you wish to use StyleGAN3's `config-r`, then set `--cfg=stylegan3-r`. In addition, if you wish to transfer learn from FFHQU at 1024 resolution, set `--resume=ffhqu1024`. + * The full list of currently available models to transfer learn from (or synthesize new images with) is the following (***TODO:*** add small description of each model, + so the user can better know which to use for their particular usecase; proper citation to original authors as well): + +
+ StyleGAN2 models + + 1. Majority, if not all, are `config-f`: set `--cfg=stylegan2` + * `ffhq256` + * `ffhqu256` + * `ffhq512` + * `ffhq1024` + * `ffhqu1024` + * `celebahq256` + * `lsundog256` + * `afhqcat512` + * `afhqdog512` + * `afhqwild512` + * `afhq512` + * `brecahad512` + * `cifar10` (conditional, 10 classes) + * `metfaces1024` + * `metfacesu1024` + * `lsuncar512` (config-f) + * `lsuncat256` (config-f) + * `lsunchurch256` (config-f) + * `lsunhorse256` (config-f) + * `minecraft1024` (thanks to @jeffheaton) + * `imagenet512` (thanks to @shawwn) + * `wikiart1024-C` (conditional, 167 classes; thanks to @pbaylies) + * `wikiart1024-U` (thanks to @pbaylies) + * `maps1024` (thanks to @tjukanov) + * `fursona512` (thanks to @arfafax) + * `mlpony512` (thanks to @arfafax) + * `afhqcat256` (Deceive-D/APA models) + * `anime256` (Deceive-D/APA models) + * `cub256` (Deceive-D/APA models) + * `sddogs1024` (Self-Distilled StyleGAN models) + * `sdelephant512` (Self-Distilled StyleGAN models) + * `sdhorses512` (Self-Distilled StyleGAN models) + * `sdbicycles256` (Self-Distilled StyleGAN models) + * `sdlions512` (Self-Distilled StyleGAN models) + * `sdgiraffes512` (Self-Distilled StyleGAN models) + * `sdparrots512` (Self-Distilled StyleGAN models) +
+ +
+ StyleGAN3 models + + 1. `config-t`: set `--cfg=stylegan3-t` + * `afhq512` + * `ffhqu256` + * `ffhq1024` + * `ffhqu1024` + * `metfaces1024` + * `metfacesu1024` + * `landscapes256` (thanks to @justinpinkney) + * `wikiart1024` (thanks to @justinpinkney) + * `mechfuture256` (thanks to @edstoica; 29 kimg tick) + * `vivflowers256` (thanks to @edstoica; 68 kimg tick) + * `alienglass256` (thanks to @edstoica; 38 kimg tick) + * `scificity256` (thanks to @edstoica; 210 kimg tick) + * `scifiship256` (thanks to @edstoica; 168 kimg tick) + 2. `config-r`: set `--cfg=stylegan3-r` + * `afhq512` + * `ffhq1024` + * `ffhqu1024` + * `ffhqu256` + * `metfaces1024` + * `metfacesu1024` +
+ + * The main sources of these pretrained models are both the [official NVIDIA repository](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/research/models/stylegan3), + as well as other community repositories, such as [Justin Pinkney](https://github.com/justinpinkney) 's [Awesome Pretrained StyleGAN2](https://github.com/justinpinkney/awesome-pretrained-stylegan2) + and [Awesome Pretrained StyleGAN3](https://github.com/justinpinkney/awesome-pretrained-stylegan3), [Deceive-D/APA](https://github.com/EndlessSora/DeceiveD), + [Self-Distilled StyleGAN/Internet Photos](https://github.com/self-distilled-stylegan/self-distilled-internet-photos), and [edstoica](https://github.com/edstoica) 's + [Wombo Dream](https://www.wombo.art/) [-based models](https://github.com/edstoica/lucid_stylegan3_datasets_models). Others can be found around the net and are properly credited in this repository, + so long as they can be easily downloaded with [`dnnlib.util.open_url`](https://github.com/PDillis/stylegan3-fun/blob/4ce9d6f7601641ba1e2906ed97f2739a63fb96e2/dnnlib/util.py#L396). * Interpolation videos * [Random interpolation](https://youtu.be/DNfocO1IOUE) @@ -71,6 +114,7 @@ This repository adds the following (not yet the complete list): * Sightseeding * [Circular interpolation](https://youtu.be/4nktYGjSVHg) * [Visual-reactive interpolation](https://youtu.be/KoEAkPnE-zA) (Beta) + * Audiovisual-reactive interpolation (TODO) * Projection into the latent space * [Project into W+](https://arxiv.org/abs/1904.03189) * Additional losses to use for better projection (e.g., using VGG16 or [CLIP](https://github.com/openai/CLIP)) @@ -79,12 +123,16 @@ This repository adds the following (not yet the complete list): * Start from a random image (`random` or `perlin`, using [Mathieu Duchesneau's implementation](https://github.com/duchesneaumathieu/pyperlin)) or from an existing one * Expansion on GUI/`visualizer.py` * Added the rest of the affine transformations + * Added widget for class-conditional models (***TODO:*** mix classes with continuous values for `cls`!) * General model and code additions - * No longer necessary to specify `--outdir` when running the code, as the output directory will be automatically generated - * [Better sampling?](https://arxiv.org/abs/2110.08009) (TODO) - * StyleGAN3: anchor the latent space for easier to follow interpolations + * ***TODO:*** [Better sampling?](https://arxiv.org/abs/2110.08009) + * [Multi-modal truncation trick](https://arxiv.org/abs/2202.12211): find the different clusters in your model and use the closest one to your dlatent, in order to increase the fidelity (TODO: finish skeleton implementation) + * StyleGAN3: anchor the latent space for easier to follow interpolations (thanks to [Rivers Have Wings](https://github.com/crowsonkb) and [nshepperd](https://github.com/nshepperd)). + * Use CPU instead of GPU if desired (not recommended, but perfectly fine for generating images, whenever the custom CUDA kernels fail to compile). + * Add missing dependencies and channels so that the [`conda`](https://docs.conda.io/en/latest/) environment is correctly setup in Windows + (PR's [#111](https://github.com/NVlabs/stylegan3/pull/111) /[#116](https://github.com/NVlabs/stylegan3/pull/116) /[#125](https://github.com/NVlabs/stylegan3/pull/125) and [#80](https://github.com/NVlabs/stylegan3/pull/80) /[#143](https://github.com/NVlabs/stylegan3/pull/143) from the base, respectively) -***TODO:*** Finish documentation for better user experience, add videos/images, code samples. +***TODO:*** Finish documentation for better user experience, add videos/images, code samples, visuals... --- @@ -161,7 +209,7 @@ See [Troubleshooting](./docs/troubleshooting.md) for help on common installation Pre-trained networks are stored as `*.pkl` files that can be referenced using local filenames or URLs: -```.bash +```bash # Generate an image using pre-trained AFHQv2 model ("Ours" in Figure 1, left). python gen_images.py --outdir=out --trunc=1 --seeds=2 \ --network=https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/stylegan3-r-afhqv2-512x512.pkl @@ -175,7 +223,7 @@ Outputs from the above commands are placed under `out/*.png`, controlled by `--o **Docker**: You can run the above curated image example using Docker as follows: -```.bash +```bash # Build the stylegan3:latest image docker build --tag stylegan3 . @@ -199,7 +247,7 @@ The `docker run` invocation may look daunting, so let's unpack its contents here This release contains an interactive model visualization tool that can be used to explore various characteristics of a trained model. To start it, run: -```.bash +```bash python visualizer.py ``` @@ -209,7 +257,7 @@ python visualizer.py You can use pre-trained networks in your own Python code as follows: -```.python +```python with open('ffhq.pkl', 'rb') as f: G = pickle.load(f)['G_ema'].cuda() # torch.nn.Module z = torch.randn([1, G.z_dim]).cuda() # latent codes @@ -223,7 +271,7 @@ The pickle contains three networks. `'G'` and `'D'` are instantaneous snapshots The generator consists of two submodules, `G.mapping` and `G.synthesis`, that can be executed separately. They also support various additional options: -```.python +```python w = G.mapping(z, c, truncation_psi=0.5, truncation_cutoff=8) img = G.synthesis(w, noise_mode='const', force_fp32=True) ``` @@ -236,7 +284,7 @@ Datasets are stored as uncompressed ZIP archives containing uncompressed PNG fil **FFHQ**: Download the [Flickr-Faces-HQ dataset](https://github.com/NVlabs/ffhq-dataset) as 1024x1024 images and create a zip archive using `dataset_tool.py`: -```.bash +```bash # Original 1024x1024 resolution. python dataset_tool.py --source=/tmp/images1024x1024 --dest=~/datasets/ffhq-1024x1024.zip @@ -249,7 +297,7 @@ See the [FFHQ README](https://github.com/NVlabs/ffhq-dataset) for information on **MetFaces**: Download the [MetFaces dataset](https://github.com/NVlabs/metfaces-dataset) and create a ZIP archive: -```.bash +```bash python dataset_tool.py --source=~/downloads/metfaces/images --dest=~/datasets/metfaces-1024x1024.zip ``` @@ -257,13 +305,13 @@ See the [MetFaces README](https://github.com/NVlabs/metfaces-dataset) for inform **AFHQv2**: Download the [AFHQv2 dataset](https://github.com/clovaai/stargan-v2/blob/master/README.md#animal-faces-hq-dataset-afhq) and create a ZIP archive: -```.bash +```bash python dataset_tool.py --source=~/downloads/afhqv2 --dest=~/datasets/afhqv2-512x512.zip ``` Note that the above command creates a single combined dataset using all images of all three classes (cats, dogs, and wild animals), matching the setup used in the StyleGAN3 paper. Alternatively, you can also create a separate dataset for each class: -```.bash +```bash python dataset_tool.py --source=~/downloads/afhqv2/train/cat --dest=~/datasets/afhqv2cat-512x512.zip python dataset_tool.py --source=~/downloads/afhqv2/train/dog --dest=~/datasets/afhqv2dog-512x512.zip python dataset_tool.py --source=~/downloads/afhqv2/train/wild --dest=~/datasets/afhqv2wild-512x512.zip @@ -273,7 +321,7 @@ python dataset_tool.py --source=~/downloads/afhqv2/train/wild --dest=~/datasets/ You can train new networks using `train.py`. For example: -```.bash +```bash # Train StyleGAN3-T for AFHQv2 using 8 GPUs. python train.py --outdir=~/training-runs --cfg=stylegan3-t --data=~/datasets/afhqv2-512x512.zip \ --gpus=8 --batch=32 --gamma=8.2 --mirror=1 @@ -298,7 +346,7 @@ By default, `train.py` automatically computes FID for each network pickle export Additional quality metrics can also be computed after the training: -```.bash +```bash # Previous training run: look up options automatically, save result to JSONL file. python calc_metrics.py --metrics=eqt50k_int,eqr50k \ --network=~/training-runs/00000-stylegan3-r-mydataset/network-snapshot-000000.pkl @@ -339,7 +387,7 @@ References: The easiest way to inspect the spectral properties of a given generator is to use the built-in FFT mode in `visualizer.py`. In addition, you can visualize average 2D power spectra (Appendix A, Figure 15) as follows: -```.bash +```bash # Calculate dataset mean and std, needed in subsequent steps. python avg_spectra.py stats --source=~/datasets/ffhq-1024x1024.zip diff --git a/environment.yml b/environment.yml index 578a58a0..6f1a40b1 100644 --- a/environment.yml +++ b/environment.yml @@ -2,6 +2,7 @@ name: stylegan3 channels: - pytorch - nvidia + - conda-forge # PR #80 by @SetZero / #143 by @coldwaterq dependencies: - python >= 3.8 - pip @@ -10,7 +11,7 @@ dependencies: - pillow=8.3.1 - scipy=1.7.1 - pytorch=1.9.1 - - cudatoolkit=11.1 + - cudatoolkit>=11.1 # PR #116 by @edstoica - requests=2.26.0 - tqdm=4.62.2 - ninja=1.10.2 @@ -22,3 +23,5 @@ dependencies: - pyopengl==3.1.5 - imageio-ffmpeg==0.4.3 - pyspng + - psutil # PR #125 by @fastflair / #111 by @siddharthksah + - tensorboard # PR #125 by @fastflair \ No newline at end of file diff --git a/style_mixing.py b/style_mixing.py index fa27d593..d34ea0d1 100644 --- a/style_mixing.py +++ b/style_mixing.py @@ -48,23 +48,22 @@ def style_names(max_style: int, file_name: str, desc: str, col_styles: List[int] to both the file name and the new directory to be created. """ if list(range(0, 4)) == col_styles: - file_name = f'{file_name}-coarse_styles' - desc = f'{desc}-coarse_styles' + styles = 'coarse_styles' elif list(range(4, 8)) == col_styles: - file_name = f'{file_name}-middle_styles' - desc = f'{desc}-middle_styles' + styles = 'middle_styles' elif list(range(8, max_style)) == col_styles: - file_name = f'{file_name}-fine_styles' - desc = f'{desc}-fine_styles' + styles = 'fine_styles' elif list(range(0, 8)) == col_styles: - file_name = f'{file_name}-coarse+middle_styles' - desc = f'{desc}-coarse+middle_styles' + styles = 'coarse+middle_styles' elif list(range(4, max_style)) == col_styles: - file_name = f'{file_name}-middle+fine_styles' - desc = f'{desc}-middle+fine_styles' + styles = 'middle+fine_styles' elif list(range(0, 4)) + list(range(8, max_style)) == col_styles: - file_name = f'{file_name}-coarse+fine_styles' - desc = f'{desc}-coarse+fine_styles' + styles = 'coarse+fine_styles' + else: + styles = 'custom_styles' + + file_name = f'{file_name}-{styles}' + desc = f'{desc}-{styles}' return file_name, desc diff --git a/torch_utils/gen_utils.py b/torch_utils/gen_utils.py index cb3052e1..c391838a 100644 --- a/torch_utils/gen_utils.py +++ b/torch_utils/gen_utils.py @@ -386,6 +386,15 @@ def force_fp32(G) -> None: 'afhqcat256': 'https://drive.google.com/u/0/uc?export=download&confirm=zFoN&id=1P9ouHIK-W8JTb6bvecfBe4c_3w6gmMJK', 'anime256': 'https://drive.google.com/u/0/uc?export=download&confirm=6Uie&id=1EWOdieqELYmd2xRxUR4gnx7G10YI5dyP', 'cub256': 'https://drive.google.com/u/0/uc?export=download&confirm=KwZS&id=1J0qactT55ofAvzddDE_xnJEY8s3vbo1_', + # Self-Distilled StyleGAN (full body representation of each class): https://github.com/self-distilled-stylegan/self-distilled-internet-photos + 'sddogs1024': 'https://storage.googleapis.com/self-distilled-stylegan/dogs_1024_pytorch.pkl', + 'sdelephant512': 'https://storage.googleapis.com/self-distilled-stylegan/elephants_512_pytorch.pkl', + 'sdhorses512': 'https://storage.googleapis.com/self-distilled-stylegan/horses_256_pytorch.pkl', + 'sdbicycles256': 'https://storage.googleapis.com/self-distilled-stylegan/bicycles_256_pytorch.pkl', + 'sdlions512': 'https://storage.googleapis.com/self-distilled-stylegan/lions_512_pytorch.pkl', + 'sdgiraffes512': 'https://storage.googleapis.com/self-distilled-stylegan/giraffes_512_pytorch.pkl', + 'sdparrots512': 'https://storage.googleapis.com/self-distilled-stylegan/parrots_512_pytorch.pkl' + }, # For StyleGAN3 config-r models (--cfg=stylegan3-r) 'stylegan3-r': { @@ -410,11 +419,11 @@ def force_fp32(G) -> None: 'landscapes256': 'https://drive.google.com/u/0/uc?export=download&confirm=eJHe&id=14UGDDOusZ9TMb-pOrF0PAjMGVWLSAii1', # Thanks to @justinpinkney 'wikiart1024': 'https://drive.google.com/u/0/uc?export=download&confirm=2tz5&id=18MOpwTMJsl_Z17q-wQVnaRLCUFZYSNkj', # Thanks to @justinpinkney # -> Wombo Dream-based models found in: https://github.com/edstoica/lucid_stylegan3_datasets_models by @edstoica; TODO: more to come, update the list as they are released! - 'mechfuture256': 'https://www.dropbox.com/s/v2oie53cz62ozvu/network-snapshot-000029.pkl', # Thanks to @edstoica; 29kimg tick - 'vivflowers256': 'https://www.dropbox.com/s/o33lhgnk91hstvx/network-snapshot-000069.pkl', # Thanks to @edstoica; 68kimg tick - 'alienglass256': 'https://www.dropbox.com/s/gur14k0e7kspguy/network-snapshot-000038.pkl', # Thanks to @edstoica; 38kimg tick - 'scificity256': 'https://www.dropbox.com/s/1kfsmlct4mriphc/network-snapshot-000210.pkl', # Thanks to @edstoica; 210kimg tick - 'scifiship256': 'https://www.dropbox.com/s/02br3mjkma1hubc/network-snapshot-000162.pkl', # Thanks to @edstoica; 168kimg tick + 'mechfuture256': 'https://www.dropbox.com/s/v2oie53cz62ozvu/network-snapshot-000029.pkl?dl=1', # Thanks to @edstoica; 29kimg tick + 'vivflowers256': 'https://www.dropbox.com/s/o33lhgnk91hstvx/network-snapshot-000069.pkl?dl=1', # Thanks to @edstoica; 68kimg tick + 'alienglass256': 'https://www.dropbox.com/s/gur14k0e7kspguy/network-snapshot-000038.pkl?dl=1', # Thanks to @edstoica; 38kimg tick + 'scificity256': 'https://www.dropbox.com/s/1kfsmlct4mriphc/network-snapshot-000210.pkl?dl=1', # Thanks to @edstoica; 210kimg tick + 'scifiship256': 'https://www.dropbox.com/s/02br3mjkma1hubc/network-snapshot-000162.pkl?dl=1', # Thanks to @edstoica; 168kimg tick } }