Merge pull request #235 from stochasticai/dev

Dev
stochasticai · Jul 26, 2023 · 99261f3 · 99261f3
2 parents 5eac0c4 + 4587157
commit 99261f3
Show file tree

Hide file tree

Showing 44 changed files with 717 additions and 162 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,3 +21,13 @@ repos:
     rev: v0.19.1
     hooks:
       - id: gitlint
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.1.1
+    hooks:
+    -   id: autoflake
+        args: ["--in-place", "--remove-all-unused-imports", "--ignore-init-module-imports"]
+  - repo: https://github.com/MarcoGorelli/absolufy-imports
+    rev: v0.3.1
+    hooks:
+    -   id: absolufy-imports
+        args: ["--application-directories=.:src"]
diff --git a/README.md b/README.md
@@ -35,11 +35,71 @@ With `xTuring` you can,
 
 ## 🌟 What's new?
 We are excited to announce the latest enhancements to our `xTuring` library:
-1. __`Falcon LLM` integration__ - You can use and fine-tune the _`Falcon-7B`_ model in different configurations: _off-the-shelf_, _off-the-shelf with INT8 precision_, _LoRA fine-tuning_, and _LoRA fine-tuning with INT8 precision_.
-2. __`GenericModel` wrapper__ - This new integration allows you to test and fine-tune any new model on `xTuring` without waiting for it to be integrated using class _`GenericModel`_.
+1. __`LLaMA 2` integration__ - You can use and fine-tune the _`LLaMA 2`_ model in different configurations: _off-the-shelf_, _off-the-shelf with INT8 precision_, _LoRA fine-tuning_, _LoRA fine-tuning with INT8 precision_ and _LoRA fine-tuning with INT4 precision_ using the `GenericModel` wrapper and/or you can use the `Llama2` class from `xturing.models` to test and finetune the model.
+```python
+from xturing.models import Llama2
+model = Llama2()
+
+## or
+from xturing.models import BaseModel
+model = BaseModel.create('llama2')
+
+```
+2. __`Evaluation`__ - Now you can evaluate any `Causal Language Model` on any dataset. The metrics currently supported is [`perplexity`](https://towardsdatascience.com/perplexity-in-language-models-87a196019a94).
+```python
+# Make the necessary imports
+from xturing.datasets import InstructionDataset
+from xturing.models import BaseModel
+
+# Load the desired dataset
+dataset = InstructionDataset('../llama/alpaca_data')
+
+# Load the desired model
+model = BaseModel.create('gpt2')
+
+# Run the Evaluation of the model on the dataset
+result = model.evaluate(dataset)
+
+# Print the result
+print(f"Perplexity of the evalution: {result}")
+
+```
+3. __`INT4` Precision__ - You can now use and fine-tune any LLM with `INT4 Precision` using `GenericKbitModel`.
+```python
+# Make the necessary imports
+from xturing.datasets import InstructionDataset
+from xturing.models import GenericKbitModel
+
+# Load the desired dataset
+dataset = InstructionDataset('../llama/alpaca_data')
+
+# Load the desired model for INT4 bit fine-tuning
+model = GenericKbitModel('tiiuae/falcon-7b')
+
+# Run the fine-tuning
+model.finetune(dataset)
+```
+4. __CPU inference__ - Now you can use just your CPU for inference of any LLM. _CAUTION : The inference process may be sluggish because CPUs lack the required computational capacity for efficient inference_.
+5. __Batch integration__ - By tweaking the 'batch_size' in the .generate() and .evaluate() functions, you can expedite results. Using a 'batch_size' greater than 1 typically enhances processing efficiency.
+```python
+# Make the necessary imports
+from xturing.datasets import InstructionDataset
+from xturing.models import GenericKbitModel
+
+# Load the desired dataset
+dataset = InstructionDataset('../llama/alpaca_data')
+
+# Load the desired model for INT4 bit fine-tuning
+model = GenericKbitModel('tiiuae/falcon-7b')
+
+# Generate outputs on desired prompts
+outputs = model.generate(dataset = dataset, batch_size=10)
+
+```
+
+An exploration of the [Llama LoRA INT4 working example](examples/int4_finetuning/LLaMA_lora_int4.ipynb) is recommended for an understanding of its application.
 
-You can check the  [Falcon LoRA INT8 working example](examples/falcon/falcon_lora_int8.py) repository to see how it works.
-Also, you can check the  [GenericModel working example](examples/generic/generic_model.py) repository to see how it works.
+For an extended insight, consider examining the [GenericModel working example](examples/generic/generic_model.py) available in the repository.
 
 <br>
 
@@ -170,8 +230,8 @@ model = BaseModel.load("x/distilgpt2_lora_finetuned_alpaca")
 - [x] INT4 LLaMA LoRA fine-tuning with INT4 generation
 - [x] Support for a `Generic model` wrapper
 - [x] Support for `Falcon-7B` model
-- [X] INT4 low-precision fine-tuning support
-- [ ] Evaluation of LLM models
+- [x] INT4 low-precision fine-tuning support
+- [x] Evaluation of LLM models
 - [ ] INT3, INT2, INT1 low-precision fine-tuning support
 - [ ] Support for Stable Diffusion
 

diff --git a/docs/docs/intro.md b/docs/docs/intro.md
@@ -39,13 +39,16 @@ You can quickly get started with xTuring by following the [Quickstart](/quicksta
 
 | Model | Examples |
 | --- | --- |
-| LLaMA | [LLaMA 7B fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/llama) |
+| Bloom | [Bloom fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/bloom) |
+| Cerebras-GPT | [Cerebras-GPT fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/cerebras) |
+| Falcon | [Falcon 7B fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/falcon) |
+| Galactica | [Galactica fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/galactica) |
+| Generic Wrapper | [Any large language model fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/generic) |
 | GPT-J | [GPT-J 6B LoRA fine-tuning with/without INT8 ](https://github.com/stochasticai/xturing/tree/main/examples/gptj) |
 | GPT-2 | [GPT-2 fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/gpt2) |
+| LLaMA | [LLaMA 7B fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/llama) |
+| LLaMA 2 | [LLaMA 2 7B fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/llama2) |
 | OPT | [OPT fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/opt) |
-| Cerebras-GPT | [Cerebras-GPT fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/cerebras) |
-| Galactica | [Galactica fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/galactica) |
-| Bloom | [Bloom fine-tuning on Alpaca dataset with/without LoRA and with/without INT8](https://github.com/stochasticai/xturing/tree/main/examples/bloom) |
 
 xTuring is licensed under [Apache 2.0](https://github.com/stochasticai/xturing/blob/main/LICENSE)
 

diff --git a/examples/evaluation/evaluation.py b/examples/evaluation/evaluation.py
@@ -0,0 +1,15 @@
+# Make the necessary imports
+from xturing.datasets import InstructionDataset
+from xturing.models import BaseModel
+
+# Load the desired dataset
+dataset = InstructionDataset("../llama/alpaca_data")
+
+# Load the desired model
+model = BaseModel.create("gpt2")
+
+# Run the Evaluation of the model on the dataset
+result = model.evaluate(dataset)
+
+# Print the result
+print(f"Perplexity of the evalution: {result}")
diff --git a/examples/int4_finetuning/LLaMA_lora_int4.ipynb b/examples/int4_finetuning/LLaMA_lora_int4.ipynb
@@ -31,8 +31,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install xturing --upgrade\n",
-    "!pip install xturing[int4] --upgrade"
+    "!pip install xturing --upgrade"
    ]
   },
   {
@@ -56,15 +55,15 @@
    "outputs": [],
    "source": [
     "from xturing.datasets.instruction_dataset import InstructionDataset\n",
-    "from xturing.models import BaseModel\n",
+    "from xturing.models import GenericLoraKbitModel\n",
     "from pytorch_lightning.loggers import WandbLogger\n",
     "\n",
     "# Initializes WandB integration \n",
     "wandb_logger = WandbLogger()\n",
     "\n",
     "instruction_dataset = InstructionDataset(\"../llama/alpaca_data\")\n",
     "# Initializes the model\n",
-    "model = BaseModel.create(\"llama_lora_int4\")"
+    "model = GenericLoraKbitModel('aleksickx/llama-7b-hf')"
    ]
   },
   {

diff --git a/examples/llama2/llama2.py b/examples/llama2/llama2.py
@@ -0,0 +1,21 @@
+# Make the necessary imports
+from xturing.models import Llama2
+
+# Load the model
+model = Llama2()
+# Generate ouputs from the model
+outputs = model.generate(texts=["How are you?"])
+# Print the generated outputs
+print(outputs)
+
+## or
+
+# Make the necessary imports
+from xturing.models import BaseModel
+
+# Load the model
+model = BaseModel.create("llama2")
+# Generate ouputs from the model
+outputs = model.generate(texts=["How are you?"])
+# Print the generated outputs
+print(outputs)
diff --git a/examples/opt/opt_evaluate.py b/examples/opt/opt_evaluate.py
@@ -0,0 +1,10 @@
+from xturing.datasets.instruction_dataset import InstructionDataset
+from xturing.models import BaseModel
+
+instruction_dataset = InstructionDataset("../examples/llama/alpaca_data")
+# Initializes the model
+model = BaseModel.create("opt")
+# Call the evaluate function
+perplexity = model.evaluate(instruction_dataset, batch_size=5)
+
+print(perplexity)
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,2 +1,4 @@
 pre-commit
 pytest
+autoflake
+absoulify-imports
diff --git a/src/xturing/cli/chat.py b/src/xturing/cli/chat.py
@@ -1,4 +1,3 @@
-import time
 from pathlib import Path
 
 import click

diff --git a/src/xturing/config/finetuning_config.yaml b/src/xturing/config/finetuning_config.yaml
@@ -193,6 +193,7 @@ llama:
   num_train_epochs: 3
   optimizer_name: cpu_adam
 
+
 llama_lora:
   learning_rate: 1e-4
   weight_decay: 0.01
@@ -227,6 +228,36 @@ llama_lora_kbit:
   intra_save_freq: 200
   groupsize: 128
 
+llama2:
+  learning_rate: 5e-5
+  weight_decay: 0.01
+  num_train_epochs: 3
+  optimizer_name: cpu_adam
+
+llama2_lora:
+  learning_rate: 5e-5
+  weight_decay: 0.01
+  num_train_epochs: 3
+  optimizer_name: cpu_adam
+
+llama2_lora_int8:
+  learning_rate: 5e-5
+  weight_decay: 0.01
+  num_train_epochs: 3
+  optimizer_name: cpu_adam
+
+llama2_int8:
+  learning_rate: 5e-5
+  weight_decay: 0.01
+  num_train_epochs: 3
+  optimizer_name: cpu_adam
+
+llama2_lora_kbit:
+  learning_rate: 5e-5
+  weight_decay: 0.01
+  num_train_epochs: 3
+  optimizer_name: cpu_adam
+
 opt:
   learning_rate: 5e-5
   weight_decay: 0.01

diff --git a/src/xturing/config/generation_config.yaml b/src/xturing/config/generation_config.yaml
@@ -191,6 +191,35 @@ llama_lora_kbit:
   max_new_tokens: 256
   do_sample: false
 
+# Contrastive search
+llama2:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 256
+  do_sample: false
+
+# Contrastive search
+llama2_lora:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 256
+  do_sample: false
+
+# Greedy search
+llama2_int8:
+  max_new_tokens: 256
+  do_sample: false
+
+# Greedy search
+llama2_lora_int8:
+  max_new_tokens: 256
+  do_sample: false
+
+# Greedy search
+llama2_lora_kbit:
+  max_new_tokens: 256
+  do_sample: false
+
 # Contrastive search
 opt:
   penalty_alpha: 0.6

diff --git a/src/xturing/datasets/__init__.py b/src/xturing/datasets/__init__.py
@@ -1,7 +1,10 @@
-from .base import BaseDataset
-from .instruction_dataset import InstructionDataset, InstructionDatasetMeta
-from .text2image_dataset import Text2ImageDataset
-from .text_dataset import TextDataset, TextDatasetMeta
+from xturing.datasets.base import BaseDataset
+from xturing.datasets.instruction_dataset import (
+    InstructionDataset,
+    InstructionDatasetMeta,
+)
+from xturing.datasets.text2image_dataset import Text2ImageDataset
+from xturing.datasets.text_dataset import TextDataset, TextDatasetMeta
 
 BaseDataset.add_to_registry(TextDataset.config_name, TextDataset)
 BaseDataset.add_to_registry(InstructionDataset.config_name, InstructionDataset)

diff --git a/src/xturing/datasets/instruction_dataset.py b/src/xturing/datasets/instruction_dataset.py
@@ -1,5 +1,4 @@
 import json
-import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Optional, Union

diff --git a/src/xturing/engines/__init__.py b/src/xturing/engines/__init__.py
@@ -1,47 +1,63 @@
-from .base import BaseEngine
-from .bloom_engine import (
+from xturing.engines.base import BaseEngine
+from xturing.engines.bloom_engine import (
     BloomEngine,
     BloomInt8Engine,
     BloomLoraEngine,
     BloomLoraInt8Engine,
 )
-from .cerebras_engine import (
+from xturing.engines.cerebras_engine import (
     CerebrasEngine,
     CerebrasInt8Engine,
     CerebrasLoraEngine,
     CerebrasLoraInt8Engine,
 )
-from .distilgpt2_engine import DistilGPT2Engine, DistilGPT2LoraEngine
-from .falcon_engine import (
+from xturing.engines.distilgpt2_engine import DistilGPT2Engine, DistilGPT2LoraEngine
+from xturing.engines.falcon_engine import (
     FalconEngine,
     FalconInt8Engine,
     FalconLoraEngine,
     FalconLoraInt8Engine,
     FalconLoraKbitEngine,
 )
-from .galactica_engine import (
+from xturing.engines.galactica_engine import (
     GalacticaEngine,
     GalacticaInt8Engine,
     GalacticaLoraEngine,
     GalacticaLoraInt8Engine,
 )
-from .generic_engine import (
+from xturing.engines.generic_engine import (
     GenericEngine,
     GenericInt8Engine,
     GenericLoraEngine,
     GenericLoraInt8Engine,
     GenericLoraKbitEngine,
 )
-from .gpt2_engine import GPT2Engine, GPT2Int8Engine, GPT2LoraEngine, GPT2LoraInt8Engine
-from .gptj_engine import GPTJEngine, GPTJInt8Engine, GPTJLoraEngine, GPTJLoraInt8Engine
-from .llama_engine import (
+from xturing.engines.gpt2_engine import (
+    GPT2Engine,
+    GPT2Int8Engine,
+    GPT2LoraEngine,
+    GPT2LoraInt8Engine,
+)
+from xturing.engines.gptj_engine import (
+    GPTJEngine,
+    GPTJInt8Engine,
+    GPTJLoraEngine,
+    GPTJLoraInt8Engine,
+)
+from xturing.engines.llama2_engine import LLama2Engine
+from xturing.engines.llama_engine import (
     LLamaEngine,
     LLamaInt8Engine,
     LlamaLoraEngine,
     LlamaLoraInt8Engine,
     LlamaLoraKbitEngine,
 )
-from .opt_engine import OPTEngine, OPTInt8Engine, OPTLoraEngine, OPTLoraInt8Engine
+from xturing.engines.opt_engine import (
+    OPTEngine,
+    OPTInt8Engine,
+    OPTLoraEngine,
+    OPTLoraInt8Engine,
+)
 
 BaseEngine.add_to_registry(BloomEngine.config_name, BloomEngine)
 BaseEngine.add_to_registry(BloomInt8Engine.config_name, BloomInt8Engine)
@@ -80,6 +96,7 @@
 BaseEngine.add_to_registry(LlamaLoraEngine.config_name, LlamaLoraEngine)
 BaseEngine.add_to_registry(LlamaLoraInt8Engine.config_name, LlamaLoraInt8Engine)
 BaseEngine.add_to_registry(LlamaLoraKbitEngine.config_name, LlamaLoraKbitEngine)
+BaseEngine.add_to_registry(LLama2Engine.config_name, LLama2Engine)
 BaseEngine.add_to_registry(OPTEngine.config_name, OPTEngine)
 BaseEngine.add_to_registry(OPTInt8Engine.config_name, OPTInt8Engine)
 BaseEngine.add_to_registry(OPTLoraEngine.config_name, OPTLoraEngine)