logix-project · sangkeun00 · Apr 7, 2024 · Apr 7, 2024 · Apr 7, 2024 · Apr 7, 2024
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
-  <a href="https://github.com/sangkeun00/analog/">
+  <a href="https://github.com/sangkeun00/logix/">
     <img src="assets/logo_light.png" alt="" width="40%" align="top" style="border-radius: 10px; padding-left: 120px; padding-right: 120px; background-color: white;">
   </a>
 </p>
@@ -21,36 +21,36 @@
 </div>
 
 ```bash
-git clone https://github.com/sangkeun00/analog.git; cd analog; pip install . # Install
+git clone https://github.com/sangkeun00/logix.git; cd logix; pip install . # Install
 ```
 
 ## Usage
-AnaLog is designed with the belief that diverse logs generated by neural networks, such as
+LogiX is designed with the belief that diverse logs generated by neural networks, such as
 gradient and activation, can be utilized for analyzing and debugging data, algorithms,
-and other aspects. To use AnaLog, users simply adhere to a two-stage workflow:
+and other aspects. To use LogiX, users simply adhere to a two-stage workflow:
 
 1. **Logging**: Extract and save various logs (e.g. per-sample gradient, activation) to disk.
 2. **Analysis**: Load logs from disk and perform custom analysis (e.g. influence function).
 
 ### Logging
-Logging with AnaLog is as simple as adding one `with` statement to the existing
-training code. AnaLog automatically extracts user-specified logs using PyTorch hooks, and
+Logging with LogiX is as simple as adding one `with` statement to the existing
+training code. LogiX automatically extracts user-specified logs using PyTorch hooks, and
 saves it to disk using a memory-mapped file.
 
 ```python
-import analog
+import logix
 
-run = analog.init(project="my_project") # initialze AnaLog
-analog.setup({"log": "grad", "save": "grad", "statistic": "kfac"}) # set logging config
-analog.watch(model) # add your model to log
+run = logix.init(project="my_project") # initialze LogiX
+logix.setup({"log": "grad", "save": "grad", "statistic": "kfac"}) # set logging config
+logix.watch(model) # add your model to log
 
 for input, target in data_loader:
     with run(data_id=input): # set data_id for the log from the current batch
         out = model(input)
         loss = loss_fn(out, target, reduction="sum")
         loss.backward()
         model.zero_grad()
-analog.finalize() # finalize logging
+logix.finalize() # finalize logging
 ```
 
 ### Analysis
@@ -59,19 +59,19 @@ analysis the user may want. We have currently implemented influence function, wh
 for both training data attribution and uncertainty quantification for AI safety.
 
 ```python
-from analog.analysis import InfluenceFunction
+from logix.analysis import InfluenceFunction
 
-analog.eval() # enter analysis mode
-log_loader = analog.build_log_dataloader() # return PyTorch DataLoader for log data
+logix.eval() # enter analysis mode
+log_loader = logix.build_log_dataloader() # return PyTorch DataLoader for log data
 
-with analog(data_id=test_input):
+with logix(data_id=test_input):
     test_out = model(test_input)
     test_loss = loss_fn(test_out, test_target, reduction="sum")
     test_loss.backward()
-test_log = analog.get_log() # extract a log for test data
+test_log = logix.get_log() # extract a log for test data
 
-analog.influence.compute_influence_all(test_log, log_loader) # data attribution
-analog.influence.compute_self_influence(test_log) # uncertainty
+logix.influence.compute_influence_all(test_log, log_loader) # data attribution
+logix.influence.compute_self_influence(test_log) # uncertainty
 ```
 
 ### HuggingFace Integration
@@ -82,12 +82,12 @@ frameworks (e.g. Lightning) in the future!
 
 ```python
 from transformers import Trainer, Seq2SeqTrainer
-from analog.huggingface import patch_trainer, AnaLogArguments
+from logix.huggingface import patch_trainer, LogiXArguments
 
-analog_args = AnaLogArguments(project, config, lora=True, ekfac=True)
-AnaLogTrainer = patch_trainer(Trainer)
+logix_args = LogiXArguments(project, config, lora=True, ekfac=True)
+LogiXTrainer = patch_trainer(Trainer)
 
-trainer = AnaLogTrainer(analog_args=analog_args, # pass AnaLogArguments as TrainingArguments
+trainer = LogiXTrainer(logix_args=logix_args, # pass LogiXArguments as TrainingArguments
                         model=model,
                         train_dataset=train_dataset,
                         *args,
@@ -106,7 +106,7 @@ Please check out [Examples](/examples) for more advanced features!
 Logs from neural networks are difficult to handle due to the large size. For example,
 the size of the gradient of *each* training datapoint is about as large as the whole model. Therefore,
 we provide various systems support to efficiently scale neural network analysis to
-billion-scale models. Below are a few features that AnaLog currently supports:
+billion-scale models. Below are a few features that LogiX currently supports:
 
 - **Gradient compression** (compression ratio: 1,000-100,000x)
 - **Memory-map-based data IO**
@@ -120,20 +120,20 @@ billion-scale models. Below are a few features that AnaLog currently supports:
 ## Contributing
 
 We welcome contributions from the community. Please see our [contributing
-guidelines](CONTRIBUTING.md) for details on how to contribute to AnaLog.
+guidelines](CONTRIBUTING.md) for details on how to contribute to LogiX.
 
 ## Citation
 To cite this repository:
 
 ```
-@software{analog2024github,
+@software{logix2024github,
   author = {Sang Keun Choe, Hwijeen Ahn, Juhan Bae, Minsoo Kang, Youngseog Chung, Kewen Zhao},
-  title = {{AnaLog}: Scalable Logging and Analysis Tool for Neural Networks},
-  url = {http://github.com/sangkeun00/analog},
+  title = {{LogiX}: Scalable Logging and Analysis Tool for Neural Networks},
+  url = {http://github.com/sangkeun00/logix},
   version = {0.0.1},
   year = {2024},
 }
 ```
 
 ## License
-AnaLog is licensed under the [Apache 2.0 License](LICENSE).
+LogiX is licensed under the [Apache 2.0 License](LICENSE).
diff --git a/analog/__init__.py b/analog/__init__.py
diff --git a/analog/huggingface/__init__.py b/analog/huggingface/__init__.py
diff --git a/analog/test/config.yaml b/analog/test/config.yaml
diff --git a/examples/bert_influence/compute_influence.py b/examples/bert_influence/compute_influence.py
@@ -3,8 +3,8 @@
 import torch
 import torch.nn.functional as F
 from accelerate import Accelerator
-import analog
-from analog.analysis import InfluenceFunction
+import logix
+from logix.analysis import InfluenceFunction
 
 from utils import construct_model, get_loaders
 
@@ -28,15 +28,15 @@ def main():
     model, test_loader = accelerator.prepare(model, test_loader)
 
     # Set-up AnaLog
-    run = analog.init(args.project, config=args.config_path)
+    run = logix.init(args.project, config=args.config_path)
 
-    analog.watch(model)
-    analog.initialize_from_log()
-    log_loader = analog.build_log_dataloader()
+    logix.watch(model)
+    logix.initialize_from_log()
+    log_loader = logix.build_log_dataloader()
 
     # influence analysis
-    analog.setup({"log": "grad"})
-    analog.eval()
+    logix.setup({"log": "grad"})
+    logix.eval()
     for batch in test_loader:
         data_id = tokenizer.batch_decode(batch["input_ids"])
         labels = batch.pop("labels").view(-1)

diff --git a/examples/bert_influence/config.yaml b/examples/bert_influence/config.yaml
@@ -1,4 +1,4 @@
-root_dir: ./analog
+root_dir: ./logix
 logging:
   flush_threshold: 1000000000
   cpu_offload: true

diff --git a/examples/bert_influence/extract_log.py b/examples/bert_influence/extract_log.py
@@ -4,7 +4,7 @@
 
 import torch.nn.functional as F
 from accelerate import Accelerator
-import analog
+import logix
 
 from utils import construct_model, get_loaders, set_seed
 
@@ -30,10 +30,10 @@ def main():
     model, train_loader = accelerator.prepare(model, train_loader)
 
     # AnaLog
-    run = analog.init(args.project, config=args.config_path)
-    scheduler = analog.AnaLogScheduler(run, lora=True)
+    run = logix.init(args.project, config=args.config_path)
+    scheduler = logix.LogiXScheduler(run, lora=True)
 
-    analog.watch(model)
+    logix.watch(model)
     for _ in scheduler:
         for batch in tqdm(train_loader, desc="Hessian logging"):
             data_id = tokenizer.batch_decode(batch["input_ids"])
@@ -47,7 +47,7 @@ def main():
                     logits, labels, reduction="sum", ignore_index=-100
                 )
                 accelerator.backward(loss)
-        analog.finalize()
+        logix.finalize()
 
 
 if __name__ == "__main__":

diff --git a/examples/bert_influence/qualitative_analysis.py b/examples/bert_influence/qualitative_analysis.py
@@ -11,7 +11,7 @@
 )
 
 # score
-score_path = "if_analog.pt"
+score_path = "if_logix.pt"
 scores = torch.load(score_path, map_location="cpu")
 print(scores.shape)
 

diff --git a/examples/cifar_influence/compare.py b/examples/cifar_influence/compare.py
@@ -2,14 +2,14 @@
 import torch
 
 
-analog_kfac = torch.load("if_baseline.pt")
-analog_lora_pca = torch.load("if_analog_pca.pt")
+logix_kfac = torch.load("if_baseline.pt")
+logix_lora_pca = torch.load("if_logix_pca.pt")
 print(
-    "[KFAC (analog) vs LoRA-pca (analog)] pearson:",
-    pearsonr(analog_kfac, analog_lora_pca),
+    "[KFAC (logix) vs LoRA-pca (logix)] pearson:",
+    pearsonr(logix_kfac, logix_lora_pca),
 )
-analog_lora_random = torch.load("if_analog_lora.pt")
+logix_lora_random = torch.load("if_logix_lora.pt")
 print(
-    "[KFAC (analog) vs LoRA-random (analog)] pearson:",
-    pearsonr(analog_kfac, analog_lora_random),
+    "[KFAC (logix) vs LoRA-random (logix)] pearson:",
+    pearsonr(logix_kfac, logix_lora_random),
 )