From 5e9fa33f3d15e998176b9bbffe27bdf1d64cb3e8 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 6 Dec 2024 10:20:20 -0500 Subject: [PATCH] reduce test concurrency to avoid HF rate limiting, test suite parity (#2128) * reduce test concurrency to avoid HF rate limiting, test suite parity * make val_set_size smaller to speed up e2e tests * more retries for pytest fixture downloads * val_set_size was too small * move retry_on_request_exceptions to data utils and add retry strategy * pre-download ultrafeedback as a test fixture * refactor download retry into it's own fn * don't import from data utils * use retry mechanism now for fixtures --- .github/workflows/tests-nightly.yml | 7 +++ .github/workflows/tests.yml | 9 +++ src/axolotl/utils/data/sft.py | 29 ++------- src/axolotl/utils/data/utils.py | 46 +++++++++++++- tests/conftest.py | 60 +++++++++++++++---- tests/e2e/patched/test_4d_multipack_llama.py | 4 +- tests/e2e/patched/test_falcon_samplepack.py | 4 +- tests/e2e/patched/test_fused_llama.py | 2 +- .../e2e/patched/test_lora_llama_multipack.py | 2 +- tests/e2e/patched/test_mistral_samplepack.py | 4 +- tests/e2e/patched/test_mixtral_samplepack.py | 4 +- tests/e2e/patched/test_phi_multipack.py | 2 +- 12 files changed, 126 insertions(+), 47 deletions(-) diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 8b7561bd13..f3e5530cb8 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -23,9 +23,15 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false + max-parallel: 2 matrix: python_version: ["3.10", "3.11"] pytorch_version: ["2.3.1", "2.4.1", "2.5.1"] + exclude: + - python_version: "3.10" + pytorch_version: "2.4.1" + - python_version: "3.10" + pytorch_version: "2.5.1" timeout-minutes: 20 steps: @@ -55,6 +61,7 @@ jobs: pip3 install --upgrade pip pip3 install --upgrade packaging pip3 install -U -e . + python scripts/unsloth_install.py | sh python scripts/cutcrossentropy_install.py | sh pip3 install -r requirements-dev.txt -r requirements-tests.txt diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 690047bb14..3e1a4fe924 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -45,9 +45,15 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false + max-parallel: 2 matrix: python_version: ["3.10", "3.11"] pytorch_version: ["2.3.1", "2.4.1", "2.5.1"] + exclude: + - python_version: "3.10" + pytorch_version: "2.4.1" + - python_version: "3.10" + pytorch_version: "2.5.1" timeout-minutes: 20 steps: @@ -95,6 +101,7 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false + max-parallel: 1 matrix: python_version: ["3.11"] pytorch_version: ["2.4.1", "2.5.1"] @@ -124,6 +131,8 @@ jobs: pip3 show torch python3 setup.py sdist pip3 install dist/axolotl*.tar.gz + python scripts/unsloth_install.py | sh + python scripts/cutcrossentropy_install.py | sh pip3 install -r requirements-dev.txt -r requirements-tests.txt - name: Ensure axolotl CLI was installed diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index 4ed16e3582..f56fe8f38c 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -2,11 +2,9 @@ import functools import logging -import time from pathlib import Path from typing import List, Optional, Tuple, Union -import requests from datasets import ( Dataset, DatasetDict, @@ -44,7 +42,11 @@ UnsupportedPrompter, ) from axolotl.utils.data.pretraining import wrap_pretraining_dataset -from axolotl.utils.data.utils import deduplicate_and_log_datasets, md5 +from axolotl.utils.data.utils import ( + deduplicate_and_log_datasets, + md5, + retry_on_request_exceptions, +) from axolotl.utils.dict import DictDefault from axolotl.utils.distributed import is_local_main_process, zero_first from axolotl.utils.trainer import ( @@ -55,27 +57,6 @@ LOG = logging.getLogger("axolotl") -def retry_on_request_exceptions(max_retries=3, delay=1): - def decorator(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): # pylint: disable=inconsistent-return-statements - for attempt in range(max_retries): - try: - return func(*args, **kwargs) - except ( - requests.exceptions.ReadTimeout, - requests.exceptions.ConnectionError, - ) as exc: - if attempt < max_retries - 1: - time.sleep(delay) - else: - raise exc - - return wrapper - - return decorator - - @retry_on_request_exceptions(max_retries=3, delay=5) def prepare_dataset(cfg, tokenizer, processor=None): prompters = [] diff --git a/src/axolotl/utils/data/utils.py b/src/axolotl/utils/data/utils.py index 56bcddd8eb..657cbb77c3 100644 --- a/src/axolotl/utils/data/utils.py +++ b/src/axolotl/utils/data/utils.py @@ -1,13 +1,57 @@ """data handling helpers""" - +import functools import hashlib import logging +import time +from enum import Enum +import huggingface_hub +import requests from datasets import Dataset LOG = logging.getLogger("axolotl") +class RetryStrategy(Enum): + """ + Enum for retry strategies. + """ + + CONSTANT = 1 + LINEAR = 2 + EXPONENTIAL = 3 + + +def retry_on_request_exceptions( + max_retries=3, delay=1, retry_strategy: RetryStrategy = RetryStrategy.LINEAR +): + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): # pylint: disable=inconsistent-return-statements + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except ( + requests.exceptions.ReadTimeout, + requests.exceptions.ConnectionError, + huggingface_hub.errors.HfHubHTTPError, + ) as exc: + if attempt < max_retries - 1: + if retry_strategy == RetryStrategy.EXPONENTIAL: + step_delay = delay * 2**attempt + elif retry_strategy == RetryStrategy.LINEAR: + step_delay = delay * (attempt + 1) + else: + step_delay = delay # Use constant delay. + time.sleep(step_delay) + else: + raise exc + + return wrapper + + return decorator + + def md5(to_hash: str, encoding: str = "utf-8") -> str: try: return hashlib.md5(to_hash.encode(encoding), usedforsecurity=False).hexdigest() diff --git a/tests/conftest.py b/tests/conftest.py index 3a20bbfdd9..a775216fc0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,47 +1,77 @@ """ shared pytest fixtures """ +import functools import shutil import tempfile +import time import pytest +import requests from huggingface_hub import snapshot_download +def retry_on_request_exceptions(max_retries=3, delay=1): + # pylint: disable=duplicate-code + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): # pylint: disable=inconsistent-return-statements + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except ( + requests.exceptions.ReadTimeout, + requests.exceptions.ConnectionError, + ) as exc: + if attempt < max_retries - 1: + time.sleep(delay) + else: + raise exc + + return wrapper + + return decorator + + +@retry_on_request_exceptions(max_retries=3, delay=5) +def snapshot_download_w_retry(*args, **kwargs): + return snapshot_download(*args, **kwargs) + + @pytest.fixture(scope="session", autouse=True) def download_smollm2_135m_model(): # download the model - snapshot_download("HuggingFaceTB/SmolLM2-135M") + snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M") @pytest.fixture(scope="session", autouse=True) def download_llama_68m_random_model(): # download the model - snapshot_download("JackFram/llama-68m") + snapshot_download_w_retry("JackFram/llama-68m") @pytest.fixture(scope="session", autouse=True) def download_qwen_2_5_half_billion_model(): # download the model - snapshot_download("Qwen/Qwen2.5-0.5B") + snapshot_download_w_retry("Qwen/Qwen2.5-0.5B") @pytest.fixture(scope="session", autouse=True) def download_tatsu_lab_alpaca_dataset(): # download the dataset - snapshot_download("tatsu-lab/alpaca", repo_type="dataset") + snapshot_download_w_retry("tatsu-lab/alpaca", repo_type="dataset") @pytest.fixture(scope="session", autouse=True) def download_mhenrichsen_alpaca_2k_dataset(): # download the dataset - snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset") + snapshot_download_w_retry("mhenrichsen/alpaca_2k_test", repo_type="dataset") @pytest.fixture(scope="session", autouse=True) def download_mhenrichsen_alpaca_2k_w_revision_dataset(): # download the dataset - snapshot_download( + snapshot_download_w_retry( "mhenrichsen/alpaca_2k_test", repo_type="dataset", revision="d05c1cb" ) @@ -49,21 +79,29 @@ def download_mhenrichsen_alpaca_2k_w_revision_dataset(): @pytest.fixture(scope="session", autouse=True) def download_mlabonne_finetome_100k_dataset(): # download the dataset - snapshot_download("mlabonne/FineTome-100k", repo_type="dataset") + snapshot_download_w_retry("mlabonne/FineTome-100k", repo_type="dataset") -@pytest.fixture +@pytest.fixture(scope="session", autouse=True) def download_argilla_distilabel_capybara_dpo_7k_binarized_dataset(): # download the dataset - snapshot_download( + snapshot_download_w_retry( "argilla/distilabel-capybara-dpo-7k-binarized", repo_type="dataset" ) -@pytest.fixture +@pytest.fixture(scope="session", autouse=True) +def download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset(): + # download the dataset + snapshot_download_w_retry( + "argilla/ultrafeedback-binarized-preferences-cleaned", repo_type="dataset" + ) + + +@pytest.fixture(scope="session", autouse=True) def download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset(): # download the dataset - snapshot_download( + snapshot_download_w_retry( "arcee-ai/distilabel-intel-orca-dpo-pairs-binarized", repo_type="dataset" ) diff --git a/tests/e2e/patched/test_4d_multipack_llama.py b/tests/e2e/patched/test_4d_multipack_llama.py index a26c5d9620..b0ada92304 100644 --- a/tests/e2e/patched/test_4d_multipack_llama.py +++ b/tests/e2e/patched/test_4d_multipack_llama.py @@ -42,7 +42,7 @@ def test_sdp_lora_packing(self, temp_dir): "lora_dropout": 0.05, "lora_target_linear": True, "sequence_len": 1024, - "val_set_size": 0.1, + "val_set_size": 0.02, "datasets": [ { "path": "mhenrichsen/alpaca_2k_test", @@ -86,7 +86,7 @@ def test_torch_lora_packing(self, temp_dir): "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.1, + "val_set_size": 0.02, "datasets": [ { "path": "mhenrichsen/alpaca_2k_test", diff --git a/tests/e2e/patched/test_falcon_samplepack.py b/tests/e2e/patched/test_falcon_samplepack.py index ae6a497391..d9d7151032 100644 --- a/tests/e2e/patched/test_falcon_samplepack.py +++ b/tests/e2e/patched/test_falcon_samplepack.py @@ -40,7 +40,7 @@ def test_qlora(self, temp_dir): "lora_dropout": 0.1, "lora_target_linear": True, "lora_modules_to_save": ["word_embeddings", "lm_head"], - "val_set_size": 0.1, + "val_set_size": 0.05, "special_tokens": { "bos_token": "<|endoftext|>", "pad_token": "<|endoftext|>", @@ -80,7 +80,7 @@ def test_ft(self, temp_dir): "flash_attention": True, "sample_packing": True, "sequence_len": 2048, - "val_set_size": 0.1, + "val_set_size": 0.05, "special_tokens": { "bos_token": "<|endoftext|>", "pad_token": "<|endoftext|>", diff --git a/tests/e2e/patched/test_fused_llama.py b/tests/e2e/patched/test_fused_llama.py index de1195c368..e662e340b6 100644 --- a/tests/e2e/patched/test_fused_llama.py +++ b/tests/e2e/patched/test_fused_llama.py @@ -38,7 +38,7 @@ def test_fft_packing(self, temp_dir): "flash_attn_fuse_mlp": True, "sample_packing": True, "sequence_len": 1024, - "val_set_size": 0.1, + "val_set_size": 0.02, "special_tokens": { "unk_token": "", "bos_token": "", diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py index 5dbf146542..be2f133fb0 100644 --- a/tests/e2e/patched/test_lora_llama_multipack.py +++ b/tests/e2e/patched/test_lora_llama_multipack.py @@ -98,7 +98,7 @@ def test_lora_gptq_packed(self, temp_dir): "lora_alpha": 64, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.1, + "val_set_size": 0.02, "special_tokens": { "unk_token": "", "bos_token": "", diff --git a/tests/e2e/patched/test_mistral_samplepack.py b/tests/e2e/patched/test_mistral_samplepack.py index a56c530b21..6685fb9d57 100644 --- a/tests/e2e/patched/test_mistral_samplepack.py +++ b/tests/e2e/patched/test_mistral_samplepack.py @@ -39,7 +39,7 @@ def test_lora_packing(self, temp_dir): "lora_alpha": 64, "lora_dropout": 0.05, "lora_target_linear": True, - "val_set_size": 0.1, + "val_set_size": 0.05, "special_tokens": { "unk_token": "", "bos_token": "", @@ -80,7 +80,7 @@ def test_ft_packing(self, temp_dir): "flash_attention": True, "sample_packing": True, "sequence_len": 1024, - "val_set_size": 0.1, + "val_set_size": 0.05, "special_tokens": { "unk_token": "", "bos_token": "", diff --git a/tests/e2e/patched/test_mixtral_samplepack.py b/tests/e2e/patched/test_mixtral_samplepack.py index 8baba03073..684baaaff8 100644 --- a/tests/e2e/patched/test_mixtral_samplepack.py +++ b/tests/e2e/patched/test_mixtral_samplepack.py @@ -40,7 +40,7 @@ def test_qlora(self, temp_dir): "lora_alpha": 32, "lora_dropout": 0.1, "lora_target_linear": True, - "val_set_size": 0.1, + "val_set_size": 0.05, "special_tokens": {}, "datasets": [ { @@ -78,7 +78,7 @@ def test_ft(self, temp_dir): "flash_attention": True, "sample_packing": True, "sequence_len": 2048, - "val_set_size": 0.1, + "val_set_size": 0.05, "special_tokens": {}, "datasets": [ { diff --git a/tests/e2e/patched/test_phi_multipack.py b/tests/e2e/patched/test_phi_multipack.py index 5f30453c18..7b5bf92dfa 100644 --- a/tests/e2e/patched/test_phi_multipack.py +++ b/tests/e2e/patched/test_phi_multipack.py @@ -38,7 +38,7 @@ def test_ft_packed(self, temp_dir): "pad_to_sequence_len": True, "load_in_8bit": False, "adapter": None, - "val_set_size": 0.1, + "val_set_size": 0.05, "special_tokens": { "pad_token": "<|endoftext|>", },