Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial commit for audit logs #160

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/source/audit_logs/tables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ We store the audit logs in :class:`AuditLog <piccolo_api.audit_logs.tables.Audit
Migrations
----------

We recommend creating ``audit_logs`` tables using migrations.
We recommend creating the ``audit_logs`` tables using migrations.

You can add ``piccolo_api.audit_logs.piccolo_app`` to the ``apps`` arguments
of the :class:`AppRegistry <piccolo.conf.apps.AppRegistry>` in ``piccolo_conf.py``.
Expand Down Expand Up @@ -45,17 +45,17 @@ do this instead:
.. code-block:: python

from piccolo_api.audit_logs.tables import AuditLog
from piccolo.tables import create_tables
from piccolo.tables import create_db_tables_sync

create_tables(AuditLog, if_not_exists=True)
create_db_tables_sync(AuditLog, if_not_exists=True)

-------------------------------------------------------------------------------

Source
------

AuditLog
~~~~~~~~~~~~
~~~~~~~~

.. currentmodule:: piccolo_api.audit_logs.tables

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from enum import Enum

from piccolo.apps.migrations.auto.migration_manager import MigrationManager
from piccolo.columns.column_types import Text, Timestamp, Varchar
from piccolo.columns.column_types import JSON, Text, Timestamp, Varchar
from piccolo.columns.defaults.timestamp import TimestampNow
from piccolo.columns.indexes import IndexMethod

ID = "2022-06-25T17:11:22:238052"
ID = "2022-06-26T22:48:09:433352"
VERSION = "0.80.0"
DESCRIPTION = ""

Expand Down Expand Up @@ -106,4 +106,24 @@ async def forwards():
},
)

manager.add_column(
table_class_name="AuditLog",
tablename="audit_log",
column_name="changes_in_row",
db_column_name="changes_in_row",
column_class_name="JSON",
column_class=JSON,
params={
"default": "{}",
"null": False,
"primary_key": False,
"unique": False,
"index": False,
"index_method": IndexMethod.btree,
"choices": None,
"db_column_name": None,
"secret": False,
},
)

return manager
25 changes: 16 additions & 9 deletions piccolo_api/audit_logs/tables.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import typing as t
import uuid
from datetime import datetime
from enum import Enum

from piccolo.apps.user.tables import BaseUser
from piccolo.columns import Text, Timestamp, Varchar
from piccolo.columns import JSON, Text, Timestamp, Varchar
from piccolo.table import Table


Expand All @@ -20,9 +19,15 @@ class ActionType(str, Enum):
action_type = Varchar(choices=ActionType)
action_user = Varchar()
change_message = Text()
Copy link
Member

@dantownsend dantownsend Jun 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think one really nice thing would be recording which changes were made. There could be a JSON field which would have something like {'name': 'bob'} if the name field was changed to 'bob'.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great idea. I will try to implement it.

changes_in_row = JSON()

@classmethod
async def post_save_action(cls, table: t.Type[Table], user_id: int):
async def record_save_action(
cls,
table: t.Type[Table],
user_id: int,
new_row_id=t.Union[str, uuid.UUID, int],
):
"""
A method for tracking creating record actions.

Expand All @@ -32,20 +37,21 @@ async def post_save_action(cls, table: t.Type[Table], user_id: int):
The ``primary key`` of authenticated user.
"""
result = cls(
action_time=datetime.now(),
action_type=cls.ActionType.CREATING,
action_user=cls.get_user_username(user_id),
change_message=f"User {cls.get_user_username(user_id)} "
f"create new row in {table._meta.tablename.title()} table",
f"create row {new_row_id} in {table._meta.tablename.title()} "
f"table",
)
await result.save().run()

@classmethod
async def post_patch_action(
async def record_patch_action(
cls,
table: t.Type[Table],
row_id: t.Union[str, uuid.UUID, int],
user_id: int,
changes_in_row: t.Dict[str, t.Any],
):
"""
A method for tracking updating record actions.
Expand All @@ -57,18 +63,20 @@ async def post_patch_action(
monitor activities.
:param user_id:
The ``primary key`` of authenticated user.
:param changes_in_row:
JSON with all changed columns in the existing row.
"""
result = cls(
action_time=datetime.now(),
action_type=cls.ActionType.UPDATING,
action_user=cls.get_user_username(user_id),
change_message=f"User {cls.get_user_username(user_id)} update row "
f"{row_id} in {table._meta.tablename.title()} table",
changes_in_row=changes_in_row,
)
await result.save().run()

@classmethod
async def post_delete_action(
async def record_delete_action(
cls,
table: t.Type[Table],
row_id: t.Union[str, uuid.UUID, int],
Expand All @@ -86,7 +94,6 @@ async def post_delete_action(
The ``primary key`` of authenticated user.
"""
result = cls(
action_time=datetime.now(),
action_type=cls.ActionType.DELETING,
action_user=cls.get_user_username(user_id),
change_message=f"User {cls.get_user_username(user_id)} delete row "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be tempted to get rid of the change message all together.

One option is just storing everything in JSON.

For delete, the json can be:

// Maps to a list, so we can also use it for a bulk delete endpoint.
{
    "deleted_ids": [1, 2]
}

Likewise, for the save, it could be:

// Maps to a list, so we can also use it for a bulk creation endpoint.
{
    "created_ids": [1, 2]
}

Or we have a field called effected_rows which is an integer or array of integers.

It's just about making it easy to query - imagine you wanted to know who deleted row 42. You would have to something like:

await AuditLog.select().where(
    AuditLog.action_type == AuditLog.ActionType.DELETING,
    AuditLog.change_message.like('%42%')
)

It's quite imprecise, ad would be better to just do:

await AuditLog.select(AuditLog.action_user).where(
    AuditLog.action_type == AuditLog.ActionType.DELETING,
    AuditLog.effected_rows.any(42)
)

Copy link
Member

@dantownsend dantownsend Jun 27, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, table name should probably be a column too, to make it easier to filter out the changes on a certain table. I'd be tempted to add table_name and table_class - again, just to make it easy to filter. What do you think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be tempted to get rid of the change message all together.

One option is just storing everything in JSON.

For delete, the json can be:

// Maps to a list, so we can also use it for a bulk delete endpoint.
{
    "deleted_ids": [1, 2]
}

Likewise, for the save, it could be:

// Maps to a list, so we can also use it for a bulk creation endpoint.
{
    "created_ids": [1, 2]
}

But all actions are for endpoints that work with a single record. We don't have the bulk_delete option yet (PR here) and we don't have bulk_creation at all. Audit logs would be primarily used in Piccolo Admin and there we delete multiple records by looping through the delete_single endpoint.

Or we have a field called effected_rows which is an integer or array of integers.

That would be possible if we have only primary_key type Serial. But we also have primary_key type string and UUID, and we can't use Integer column for that (which will be great for precise filtering). We need to use Varchar and query would be very similar for effected_row and change_message using like.

It's just about making it easy to query - imagine you wanted to know who deleted row 42. You would have to something like:

await AuditLog.select().where(
    AuditLog.action_type == AuditLog.ActionType.DELETING,
    AuditLog.change_message.like('%42%')
)

It's quite imprecise, ad would be better to just do:

await AuditLog.select(AuditLog.action_user).where(
    AuditLog.action_type == AuditLog.ActionType.DELETING,
    AuditLog.effected_rows.any(42)
)

Here is screenshot with new columns.

changed_columns

I don't think we should remove the change_message column because it nicely describes the action the user has taken. Filtering is maybe imprecise, but we can't use the Integer column for effected_row.

Similarly, table name should probably be a column too, to make it easier to filter out the changes on a certain table. I'd be tempted to add table_name and table_class - again, just to make it easy to filter. What do you think?

I added table_name, but I don't understand what the table_class column would be for and what it should go into that column?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good points.

But all actions are for endpoints that work with a single record. We don't have the bulk_delete option yet (#145) and we don't have bulk_creation at all. Audit logs would be primarily used in Piccolo Admin and there we delete multiple records by looping through the delete_single endpoint.

You're right, there's no bulk action endpoints at the moment. But I was just thinking about the future. Imagine someone deletes 1000 rows - we would then have to insert 1000 rows into the audit table.

That would be possible if we have only primary_key type Serial. But we also have primary_key type string and UUID, and we can't use Integer column for that (which will be great for precise filtering). We need to use Varchar and query would be very similar for effected_row and change_message using like.

Good point - the primary key might not be an integer. Storing them all as strings is OK.

I don't think we should remove the change_message column because it nicely describes the action the user has taken. Filtering is maybe imprecise, but we can't use the Integer column for effected_row.

Maybe change_message could be achieved using get_readable instead.

I added table_name, but I don't understand what the table_class column would be for and what it should go into that column?

I was just thinking that if there's a table class called MyTable, and it refers to the database table my_table, maybe we would want to store them both. But it's not that important. Just need to decide which one is best to store - my_table or MyTable.

Copy link
Member Author

@sinisaos sinisaos Jun 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, there's no bulk action endpoints at the moment. But I was just thinking about the future. Imagine someone deletes 1000 rows - we would then have to insert 1000 rows into the audit table.

I agree, but we don’t currently have the classic bulk delete in Piccolo Admin (we loop through delete_single endpoint) and audit logs would primarily be used in Piccolo Admin (page size limit is 100 per page, which is not so bad).
Once we have the proper bulk_delete, we can change the delete action for the audit logs to something like this

# in piccolo_api.crud.endpoints.py
@apply_validators
async def delete_bulk(
self, request: Request, params: t.Optional[t.Dict[str, t.Any]] = None
) -> Response:
    """
    Bulk deletes rows - query parameters are used for filtering.
    """
    params = self._clean_data(params) if params else {}
    split_params = self._split_params(params)
    split_params_ids = split_params.ids.split(",")
  
    try:
        query: t.Union[
            Select, Count, Objects, Delete
        ] = self.table.delete()
        try:
            # Serial or UUID primary keys enabled in query params
            value_type = self.table._meta.primary_key.value_type
            ids = [value_type(item) for item in split_params_ids]
            query_ids = query.where(
                self.table._meta.primary_key.is_in(ids)
            )
            query = self._apply_filters(query_ids, split_params)
            # this is not tested but it should be work
            # in ids list can be one or more items 
            deleted_rows_map = {"deleted_ids": ids}
            try:
                if self.audit_log_table:
                    await self.audit_log_table.record_delete_action(
                        self.table,
                        deleted_rows_pk=deleted_rows_map,
                        user_id=request.user.user_id,
                    )
            except Exception as exception:
                logger.log(msg=f"{exception}", level=logging.WARNING)
        except ValueError:
            query = self._apply_filters(query, split_params)
    except MalformedQuery as exception:
         return Response(str(exception), status_code=400)

    await query.run()
    return Response(status_code=204)

# in piccolo_api.audit_logs.tables.py

class AuditLog(Table):
    ...
    deleted_rows = JSON()
    ...
    @classmethod
    async def record_delete_action(
        cls,
        table: t.Type[Table],
        deleted_rows_pk: t.Dict[str, t.Any],
        user_id: int,
    ):
        result = cls(
            action_type=cls.ActionType.deleting,
            action_user=cls.get_user_username(user_id),
            deleted_rows=deleted_rows_pk,
        )
        await result.save().run()

But for now we don't have that option because we're looping through the delete_single endpoint and we can't store multiple IDs in a row like this

// Maps to a list, so we can also use it for a bulk delete endpoint.
{
    "deleted_ids": [1, 2]
}

We should stick to what we have at the moment, and later we can always change (when we have bulk_delete we will have to make changes also in Piccolo Admin).

I was just thinking that if there's a table class called MyTable, and it refers to the database table my_table, maybe we would want to store them both. But it's not that important. Just need to decide which one is best to store - my_table or MyTable.

I think table_name is a better option because it's Varchar and we can use the filter sidebar to search and filter records.

Expand Down
70 changes: 48 additions & 22 deletions piccolo_api/crud/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def __init__(
schema_extra: t.Optional[t.Dict[str, t.Any]] = None,
max_joins: int = 0,
hooks: t.Optional[t.List[Hook]] = None,
audit_log_table: t.Optional[t.Type[AuditLog]] = None,
) -> None:
"""
:param table:
Expand Down Expand Up @@ -218,6 +219,7 @@ def __init__(
}
else:
self._hook_map = None # type: ignore
self.audit_log_table = audit_log_table or AuditLog
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no need to the or here - the user should be allowed to set it to None.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But without that we cannot add records to the AuditLog table (or I doing something wrong). If the user does not want to use the audit_logs app, just does not register it in APP_REGISTRY and can use PiccoloCRUD without audit_logs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if people don't register the audit_logs app, PiccoloCRUD will still be trying to make database queries to the AuditLog table, so we need to be able to set it to None to prevent this. I think when people use PiccoloCRUD standalone within their apps they probably won't need the audit logs - it's mostly for Piccolo Admin.

In create_admin we could add an enable_auditing argument - any tables listed will have auditing switched on in PiccoloCRUD.

def create_admin(..., audit_log_table: AuditLog = AuditLog, enable_auditing: t.List[t.Type[Table]] = [])

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Later today I will try to do that in Piccolo Admin.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right about this. We need to add a few lines of code to Piccolo Admin and everything works great and we can choose which table we want audit logs for in future enable_auditing argument in create_admin.


schema_extra = schema_extra if isinstance(schema_extra, dict) else {}
self.visible_fields_options = get_visible_fields_options(
Expand Down Expand Up @@ -335,6 +337,22 @@ def pydantic_model_plural(
rows=(t.List[base_model], None),
)

def get_single_row(
self,
table: t.Type[Table],
row_id: t.Union[str, uuid.UUID, int],
) -> t.Dict[str, t.Any]:
"""
Return a single row.
"""
row = (
self.table.select(exclude_secrets=self.exclude_secrets)
.where(self.table._meta.primary_key == row_id)
.first()
.run_sync()
)
return row

@apply_validators
async def get_schema(self, request: Request) -> JSONResponse:
"""
Expand Down Expand Up @@ -813,13 +831,17 @@ async def post_single(
row = await execute_post_hooks(
hooks=self._hook_map, hook_type=HookType.pre_save, row=row
)
try:
await AuditLog.post_save_action(
self.table, user_id=request.user.user_id
)
except AssertionError:
pass
response = await row.save().run()
new_row_id = list(response[0].values())
try:
if self.audit_log_table:
await self.audit_log_table.record_save_action(
self.table,
user_id=request.user.user_id,
new_row_id=new_row_id[0],
)
except Exception as exception:
logger.log(msg=f"{exception}", level=logging.WARNING)
json = dump_json(response)
# Returns the id of the inserted row.
return CustomJSONResponse(json, status_code=201)
Expand Down Expand Up @@ -1064,21 +1086,24 @@ async def patch_single(
)

try:
old_row = self.get_single_row(cls, row_id)
await cls.update(values).where(
cls._meta.primary_key == row_id
).run()
new_row = self.get_single_row(cls, row_id)
changes_in_row = {
k: v for k, v in new_row.items() - old_row.items()
}
try:
await AuditLog.post_patch_action(
cls, row_id=row_id, user_id=request.user.user_id
)
except AssertionError:
pass
new_row = (
await cls.select(exclude_secrets=self.exclude_secrets)
.where(cls._meta.primary_key == row_id)
.first()
.run()
)
if self.audit_log_table:
await self.audit_log_table.record_patch_action(
cls,
row_id=row_id,
user_id=request.user.user_id,
changes_in_row=changes_in_row,
)
except Exception as exception:
logger.log(msg=f"{exception}", level=logging.WARNING)
return CustomJSONResponse(self.pydantic_model(**new_row).json())
except ValueError:
return Response("Unable to save the resource.", status_code=500)
Expand All @@ -1103,11 +1128,12 @@ async def delete_single(
self.table._meta.primary_key == row_id
).run()
try:
await AuditLog.post_delete_action(
self.table, row_id=row_id, user_id=request.user.user_id
)
except AssertionError:
pass
if self.audit_log_table:
await self.audit_log_table.record_delete_action(
self.table, row_id=row_id, user_id=request.user.user_id
)
except Exception as exception:
logger.log(msg=f"{exception}", level=logging.WARNING)
return Response(status_code=204)
except ValueError:
return Response("Unable to delete the resource.", status_code=500)
Expand Down
24 changes: 19 additions & 5 deletions tests/audit_logs/test_audit_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ def test_save_audit_logs(self):
json = {"name": "Star Wars", "rating": 93}

response = client.post("/", json=json)
run_sync(AuditLog.post_save_action(Movie, user_id=user.id))
run_sync(
AuditLog.record_save_action(
Movie, user_id=user.id, new_row_id=response.json()[0]["id"]
)
)
self.assertEqual(response.status_code, 201)

audit_log = AuditLog.select(AuditLog.action_type).first().run_sync()
Expand Down Expand Up @@ -76,7 +80,12 @@ def test_patch_audit_logs(self):

response = client.patch(f"/{movie.id}/", json={"name": new_name})
run_sync(
AuditLog.post_patch_action(Movie, row_id=movie.id, user_id=user.id)
AuditLog.record_patch_action(
Movie,
row_id=movie.id,
user_id=user.id,
changes_in_row={"name": new_name},
)
)
self.assertEqual(response.status_code, 200)

Expand Down Expand Up @@ -110,7 +119,7 @@ def test_delete_audit_logs(self):

response = client.delete(f"/{movie.id}/")
run_sync(
AuditLog.post_delete_action(
AuditLog.record_delete_action(
Movie, row_id=movie.id, user_id=user.id
)
)
Expand Down Expand Up @@ -144,14 +153,19 @@ def test_clean_audit_logs(self):
json = {"name": "Star Wars", "rating": 93}

response = client.post("/", json=json)
run_sync(AuditLog.post_save_action(Movie, user_id=user.id))

run_sync(
AuditLog.record_save_action(
Movie, user_id=user.id, new_row_id=response.json()[0]["id"]
)
)
self.assertEqual(response.status_code, 201)

movie = Movie.select().first().run_sync()

response = client.delete(f"/{movie['id']}/")
run_sync(
AuditLog.post_delete_action(
AuditLog.record_delete_action(
Movie, row_id=movie["id"], user_id=user.id
)
)
Expand Down