From c2e371417fa874e33c208309d72b27a887db43c2 Mon Sep 17 00:00:00 2001 From: Hynn01 Date: Mon, 23 May 2022 11:57:33 +0200 Subject: [PATCH 1/6] fix unnecessary-iteration-pandas checker --- dslinter/checkers/unnecessary_iteration_pandas.py | 2 +- dslinter/tests/checkers/test_unnecessary_iteration_pandas.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dslinter/checkers/unnecessary_iteration_pandas.py b/dslinter/checkers/unnecessary_iteration_pandas.py index a73f782..ebbe982 100644 --- a/dslinter/checkers/unnecessary_iteration_pandas.py +++ b/dslinter/checkers/unnecessary_iteration_pandas.py @@ -54,7 +54,7 @@ def visit_call(self, node: astroid.Call): """ try: if self._iterating_through_dataframe(node): - self.add_message("dataframe-iteration-modification-pandas", node=node) + self.add_message("unnecessary-iteration-pandas", node=node) except: # pylint: disable=bare-except ExceptionHandler.handle(self, node) diff --git a/dslinter/tests/checkers/test_unnecessary_iteration_pandas.py b/dslinter/tests/checkers/test_unnecessary_iteration_pandas.py index 11ed824..74eaf5d 100644 --- a/dslinter/tests/checkers/test_unnecessary_iteration_pandas.py +++ b/dslinter/tests/checkers/test_unnecessary_iteration_pandas.py @@ -19,7 +19,7 @@ def test_iterating_through_dataframe(self): """ module_tree = astroid.parse(script) call = module_tree.body[-1].iter - with self.assertAddsMessages(pylint.testutils.MessageTest(msg_id="dataframe-iteration-modification-pandas", node=call),): + with self.assertAddsMessages(pylint.testutils.MessageTest(msg_id="unnecessary-iteration-pandas", node=call),): self.checker.visit_module(module_tree) self.checker.visit_call(call) From a3c65c06c34e8ddd0df18d65392afd6e95e5a7bb Mon Sep 17 00:00:00 2001 From: Haiyin Zhang Date: Mon, 23 May 2022 21:01:30 +0200 Subject: [PATCH 2/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2ca8c1d..35b92af 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ poetry run pytest . - **W5516 | forward-pytorch | Net Forward Checker(PyTorch)**: It is recommended to use self.net() rather than self.net.forward() in PyTorch code. If self.net.forward() is used in the code, the rule is violated. -- **W5517 | gradient-clear-pytorch | Gradient Clear Checker(PyTorch)**: The loss_fn.backward() and optimizer.step() should be used together with optimizer.zero_grad(). If the ".backward()" is missing in the code, the rule is violated. +- **W5517 | gradient-clear-pytorch | Gradient Clear Checker(PyTorch)**: The loss_fn.backward() and optimizer.step() should be used together with optimizer.zero_grad(). If the `.zero_grad()` is missing in the code, the rule is violated. - **W5518 | data-leakage-scikitlearn | Data Leakage Checker(ScikitLearn)**: All scikit-learn estimators should be used inside Pipelines, to prevent data leakage between training and test data. From c07875a8a389edebf9f9c4a8fb00c7c3880b9091 Mon Sep 17 00:00:00 2001 From: Haiyin Zhang Date: Mon, 23 May 2022 21:07:54 +0200 Subject: [PATCH 3/6] Update STEPS_TO_FOLLOW.md --- STEPS_TO_FOLLOW.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/STEPS_TO_FOLLOW.md b/STEPS_TO_FOLLOW.md index cb3679d..4faa414 100644 --- a/STEPS_TO_FOLLOW.md +++ b/STEPS_TO_FOLLOW.md @@ -10,6 +10,8 @@ Install `dslinter` from the Python Package Index: pip install dslinter ``` ### STEP 2 +A `__init__.py` file (can be empty) is expected at the folder. + Copy the following command in your terminal, type in the path to your project, and press `enter` to run: [For Linux/Mac OS Users]: From d5fefd85943e13e253b8b62107e3af679896f273 Mon Sep 17 00:00:00 2001 From: Haiyin Zhang Date: Mon, 23 May 2022 21:12:11 +0200 Subject: [PATCH 4/6] Update STEPS_TO_FOLLOW.md --- STEPS_TO_FOLLOW.md | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/STEPS_TO_FOLLOW.md b/STEPS_TO_FOLLOW.md index 4faa414..f45d64a 100644 --- a/STEPS_TO_FOLLOW.md +++ b/STEPS_TO_FOLLOW.md @@ -39,9 +39,41 @@ pylint --load-plugins=dslinter --disable=all --enable=import,unnecessary-iterati ``` ## For Notebook: + +### STEP 1 For notebook, we need to convert it to Python file first and run `dslinter` on the Python file. To convert the notebook to Python file, run: ``` jupyter nbconvert --to script ``` -Then following the two steps mentioned above for Python project. +### STEP 2 +Install `dslinter` from the Python Package Index: +``` +pip install dslinter +``` +### STEP 3 +Copy the following command in your terminal, type in the path to your project, and press `enter` to run: + +[For Linux/Mac OS Users]: +``` +pylint \ +--load-plugins=dslinter \ +--disable=all \ +--enable=import,unnecessary-iteration-pandas,unnecessary-iteration-tensorflow,\ +nan-numpy,chain-indexing-pandas,datatype-pandas,\ +column-selection-pandas,merge-parameter-pandas,inplace-pandas,\ +dataframe-conversion-pandas,scaler-missing-scikitlearn,hyperparameters-scikitlearn,\ +hyperparameters-tensorflow,hyperparameters-pytorch,memory-release-tensorflow,\ +deterministic-pytorch,randomness-control-numpy,randomness-control-scikitlearn,\ +randomness-control-tensorflow,randomness-control-pytorch,randomness-control-dataloader-pytorch,\ +missing-mask-tensorflow,missing-mask-pytorch,tensor-array-tensorflow,\ +forward-pytorch,gradient-clear-pytorch,data-leakage-scikitlearn,\ +dependent-threshold-scikitlearn,dependent-threshold-tensorflow,dependent-threshold-pytorch \ +--output-format=json:report.json,text:report.txt,colorized \ +--reports=y \ + +``` +[For Windows Users]: +``` +pylint --load-plugins=dslinter --disable=all --enable=import,unnecessary-iteration-pandas,unnecessary-iteration-tensorflow,nan-numpy,chain-indexing-pandas,datatype-pandas,column-selection-pandas,merge-parameter-pandas,inplace-pandas,dataframe-conversion-pandas,scaler-missing-scikitlearn,hyperparameters-scikitlearn,hyperparameters-tensorflow,hyperparameters-pytorch,memory-release-tensorflow,deterministic-pytorch,randomness-control-numpy,randomness-control-scikitlearn,randomness-control-tensorflow,randomness-control-pytorch,randomness-control-dataloader-pytorch,missing-mask-tensorflow,missing-mask-pytorch,tensor-array-tensorflow,forward-pytorch,gradient-clear-pytorch,data-leakage-scikitlearn,dependent-threshold-scikitlearn,dependent-threshold-tensorflow,dependent-threshold-pytorch --output-format=json:report.json,text:report.txt,colorized --reports=y +``` From 686f09859bd389e164a24476a4e443ee2eaf08c0 Mon Sep 17 00:00:00 2001 From: Haiyin Zhang Date: Mon, 23 May 2022 21:13:35 +0200 Subject: [PATCH 5/6] Update STEPS_TO_FOLLOW.md --- STEPS_TO_FOLLOW.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/STEPS_TO_FOLLOW.md b/STEPS_TO_FOLLOW.md index f45d64a..6f5ee9f 100644 --- a/STEPS_TO_FOLLOW.md +++ b/STEPS_TO_FOLLOW.md @@ -1,6 +1,6 @@ ### Here are the steps to follow for the evaluation :) -We recommend you wrap your project in a parent folder and run the following command on that folder. The output **txt** file, by default, will be generated at the folder where you run your command on. +We recommend you wrap your project (or jupyter notebook) in a parent folder and run the following command on that folder. The output **txt** file, by default, will be generated at the folder where you run your command on. ## For Python Project: From 054b755d510773bde460f4eed868dc4e70d1297b Mon Sep 17 00:00:00 2001 From: Hynn01 Date: Mon, 23 May 2022 22:01:56 +0200 Subject: [PATCH 6/6] update dslinter version --- pyproject.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 33c10b0..37f2150 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ skip = 'scripts' [tool.poetry] name = "dslinter" -version = "2.0.5" +version = "2.0.7" description = "`dslinter` is a pylint plugin for linting data science and machine learning code. We plan to support the following Python libraries: TensorFlow, PyTorch, Scikit-Learn, Pandas, NumPy and SciPy." license = "GPL-3.0 License" @@ -52,11 +52,11 @@ toml = "^0.10" # cleo = { git = "https://github.com/sdispater/cleo.git", branch = "master" } # Optional dependencies (extras) # pendulum = { version = "^1.4", optional = true } -pylint = { version = "2.12.2" } -astroid = { version = "2.9.3" } -mypy = { version = "0.931" } -data-science-types = { version = "0.2.23" } -pyspark-stubs = {version = "3.0.0.post3" } +pylint = { version = "~2.12.2" } +astroid = { version = "~2.9.3" } +mypy = { version = "~0.931" } +data-science-types = { version = "~0.2.23" } +pyspark-stubs = {version = "~3.0.0.post3" } [tool.poetry.dev-dependencies] pytest = "^3.0"