Skip to content

Commit

Permalink
Merge pull request #517 from opencybersecurityalliance/config-cleaning
Browse files Browse the repository at this point in the history
entity identifier config and multiple fixes
  • Loading branch information
subbyte authored May 7, 2024
2 parents 1624fab + de866d7 commit 1255b9f
Show file tree
Hide file tree
Showing 13 changed files with 158 additions and 123 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/code-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,21 @@ jobs:
run: pip install pytest pytest-cov
- name: Install kestrel [all packages]
run: make install
- name: Coverage for kestrel_core and kestrel_datasource_stixbundle
- name: Coverage for kestrel_core
working-directory: ./packages/kestrel_core
run: pytest -vv --cov-report=xml --cov=kestrel --cov=kestrel_datasource_stixbundle
- name: Coverage for kestrel_datasource_stixshifter
working-directory: ./packages/kestrel_datasource_stixshifter
run: pytest -vv --cov-report=xml --cov=kestrel_datasource_stixshifter
- name: Coverage for kestrel_analytics_python
working-directory: ./packages/kestrel_analytics_python
run: pytest -vv --cov-report=xml --cov=kestrel_analytics_python
run: pytest -vv --cov-report=xml --cov=kestrel
- name: Coverage for kestrel_interface_opensearch
working-directory: ./packages/kestrel_interface_opensearch
run: pytest -vv --cov-report=xml --cov=kestrel_interface_opensearch
- name: Coverage for kestrel_interface_sqlalchemy
working-directory: ./packages/kestrel_interface_sqlalchemy
run: pytest -vv --cov-report=xml --cov=kestrel_interface_sqlalchemy
- name: Coverage for kestrel_jupyter
working-directory: ./packages/kestrel_jupyter
run: pytest -vv --cov-report=xml $(ls src | grep -v '.egg-info' | xargs | sed -r 's/^| / --cov=/g')
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: false
files: ./packages/kestrel_core/coverage.xml,./packages/kestrel_datasource_stixshifter/coverage.xml,./packages/kestrel_analytics_python/coverage.xml,./packages/kestrel_jupyter/coverage.xml
files: ./packages/kestrel_core/coverage.xml,./packages/kestrel_interface_opensearch/coverage.xml,./packages/kestrel_interface_sqlalchemy/coverage.xml,./packages/kestrel_jupyter/coverage.xml
verbose: true
18 changes: 5 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,15 @@ kestrel_core:
cd packages/kestrel_core; pip install .

## Install STIX bundle data source package
kestrel_datasource_stixbundle: kestrel_core
cd packages/kestrel_datasource_stixbundle; pip install .
kestrel_interface_opensearch: kestrel_core
cd packages/kestrel_interface_opensearch; pip install .

## Install STIX-Shifter data source package
kestrel_datasource_stixshifter: kestrel_core
cd packages/kestrel_datasource_stixshifter; pip install .

## Install docker analytics
kestrel_analytics_docker: kestrel_core
cd packages/kestrel_analytics_docker; pip install .

## Install python analytics
kestrel_analytics_python: kestrel_core
cd packages/kestrel_analytics_python; pip install .
kestrel_interface_sqlalchemy: kestrel_core
cd packages/kestrel_interface_sqlalchemy; pip install .

## Install Kestrel kernel for Jupyter
kestrel_jupyter: kestrel_datasource_stixbundle kestrel_datasource_stixshifter kestrel_analytics_docker kestrel_analytics_python
kestrel_jupyter: kestrel_interface_opensearch kestrel_interface_sqlalchemy
cd packages/kestrel_jupyter; pip install .; kestrel_jupyter_setup

## Install Kestrel kernel for Jupyter
Expand Down
82 changes: 19 additions & 63 deletions packages/kestrel_core/src/kestrel/config/kestrel.yaml
Original file line number Diff line number Diff line change
@@ -1,70 +1,26 @@
# syntax default values
language:
default_variable: "_"
default_sort_order: "desc"
default_datasource_schema: "stixshifter"
default_analytics_schema: "python"

# how a Kestrel session is executed
session:
cache_directory_prefix: "kestrel-session-" # under system temp directory
local_database_path: "local.db"
log_path: "session.log"
show_execution_summary: true

# whether/how to prefetch all records/observations for entities
prefetch:

# enable/disable prefetch for command
#
# If prefetch is enabled, Kestrel will send additional queries to the data
# source to search for related records regarding entities retrieved from the
# user-specified pattern, collecting more complete information (attributes,
# connections to other entities) of the entities from different records.
switch_per_command:
get: true
find: true

# declare the list of entity types to not prefetch
#
# This can be used when a user finds prefetch hinders the performance with
# large amount of results for one or more generic type of entities. For
# example, the data source may have millions of records containing
# `C:\Windows\SYSTEM32\ntdll.dll` touched by all Windows processes in a short
# amount of time. Executing a Kestrel command `f = FIND file LINKED p` will
# retrieve the file from a process and then start prefetch to gain
# information/connections of the file from all processes. Retrieval of
# millions records will likely result in a performance issue, thus the user
# can put `file` in this list to disable prefetch for it.
excluded_entities:
-
# - file
# - user-account
# - x-oca-asset

# Detailed logic to identify the same process from different records is more
# complex than many data source query language can express, so Kestrel
# retrieves potential same process candidate records and perform fine-grained
# process identification in Kestrel with these parameters.
process_identification:
pid_but_name_changed_time_begin_offset: -5 # seconds
pid_but_name_changed_time_end_offset: 5 # seconds
pid_and_name_time_begin_offset: -3600 # seconds
pid_and_name_time_end_offset: 3600 # seconds
pid_and_ppid_time_begin_offset: -3600 # seconds
pid_and_ppid_time_end_offset: 3600 # seconds
pid_and_name_and_ppid_time_begin_offset: -86400 # seconds
pid_and_name_and_ppid_time_end_offset: 86400 # seconds

# option when generating STIX query
stixquery:
timerange_start_offset: -300 # seconds
timerange_stop_offset: 300 # seconds
support_id: false # STIX 2.0 does not support unique ID

# debug options
debug:
env_var: "KESTREL_DEBUG" # debug mode if the environment variable exists
cache_directory_prefix: "kestrel-" # under system temp directory
session_exit_marker: "session.exited"
maximum_exited_session: 3
cache_directory_path: "~/kestrel-debug-session" # put in user's home directory by default

# default identifier attribute(s) of an entity across all datasource interfaces
# if multiple attributes are specified, logic AND will be added in between
# each datasource interface config could have the same section to override this
entity_identifier:
file: "hashes[?algorithm_id == 3]" # sha256
group: uid
process: uid
endpoint: ip
device: ip
src_endpoint:
- ip
- port
dst_endpoint:
- ip
- port
certificate: serial_number
user: uid
25 changes: 24 additions & 1 deletion packages/kestrel_core/src/kestrel/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Mapping, Union

from kestrel.utils import update_nested_dict, load_data_file
from kestrel.exceptions import InvalidYamlInConfig

CONFIG_DIR_DEFAULT = Path.home() / ".config" / "kestrel"
CONFIG_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "kestrel.yaml"
Expand All @@ -14,6 +15,27 @@
_logger = logging.getLogger(__name__)


@typechecked
def load_leaf_yaml(config: Mapping, path_dir: str) -> Mapping:
new = {}
for k, v in config.items():
if isinstance(v, Mapping):
new[k] = load_leaf_yaml(v, path_dir)
elif isinstance(v, str) and v.endswith(".yaml"):
try:
if os.path.isabs(v):
with open(v, "r") as fp:
new[k] = yaml.safe_load(fp.read())
else:
with open(os.path.join(path_dir, v), "r") as fp:
new[k] = yaml.safe_load(fp.read())
except:
raise InvalidYamlInConfig(v)
else:
new[k] = v
return new


@typechecked
def load_default_config() -> Mapping:
_logger.debug(f"Loading default config file...")
Expand All @@ -36,13 +58,14 @@ def load_user_config(
with open(config_path, "r") as fp:
_logger.debug(f"User configuration file found: {config_path}")
config = yaml.safe_load(os.path.expandvars(fp.read()))
config = load_leaf_yaml(config, os.path.dirname(config_path))
except FileNotFoundError:
_logger.debug(f"User configuration file not exist.")
return config


@typechecked
def load_config() -> Mapping:
def load_kestrel_config() -> Mapping:
config_default = load_default_config()
config_user = load_user_config(CONFIG_PATH_ENV_VAR, CONFIG_PATH_DEFAULT)
_logger.debug(f"User configuration loaded: {config_user}")
Expand Down
4 changes: 4 additions & 0 deletions packages/kestrel_core/src/kestrel/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ class InvalidDataSource(KestrelError):
pass


class InvalidYamlInConfig(KestrelError):
pass


class VariableNotFound(KestrelError):
pass

Expand Down
6 changes: 2 additions & 4 deletions packages/kestrel_core/src/kestrel/frontend/compile.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Lark Transformer

import logging
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from functools import reduce

from dateutil.parser import parse as to_datetime
Expand Down Expand Up @@ -158,14 +158,12 @@ def _add_reference_branches_for_filter(graph: IRGraph, filter_node: Filter):
class _KestrelT(Transformer):
def __init__(
self,
default_variable=DEFAULT_VARIABLE,
default_sort_order=DEFAULT_SORT_ORDER,
token_prefix="",
entity_map={},
property_map={},
):
# token_prefix is the modification by Lark when using `merge_transformers()`
self.default_variable = default_variable
self.default_sort_order = default_sort_order
self.token_prefix = token_prefix
self.entity_map = entity_map
Expand Down Expand Up @@ -352,7 +350,7 @@ def timespan_relative(self, args):
delta = timedelta(minutes=num)
elif unit == "SECOND":
delta = timedelta(seconds=num)
stop = datetime.utcnow()
stop = datetime.now(timezone.utc)
start = stop - delta
return TimeRange(start, stop)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,6 @@ endpoint:
- source.mac
- server.mac
- destination.mac
port:
- client.port
- source.port
- server.port
- destination.port


# dst_endpoint: see https://schema.ocsf.io/1.1.0/objects/endpoint
Expand Down
31 changes: 26 additions & 5 deletions packages/kestrel_core/tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import kestrel.config.utils as cfg
from kestrel.config.utils import load_kestrel_config
import os


Expand All @@ -15,7 +15,7 @@ def test_env_vars_in_config():

with open(os.getenv("KESTREL_CONFIG"), "w") as fp:
fp.write(test_config)
config = cfg.load_config()
config = load_kestrel_config()
assert config["credentials"]["username"] == "test-user"
assert config["credentials"]["password"] == "test-password"

Expand All @@ -35,7 +35,7 @@ def test_env_vars_in_config_overwrite():
os.environ["KESTREL_CACHE_DIRECTORY_PREFIX"] = "Kestrel2.0-"
with open(os.getenv("KESTREL_CONFIG"), "w") as fp:
fp.write(test_config)
config = cfg.load_config()
config = load_kestrel_config()
assert config["credentials"]["username"] == "test-user"
assert config["credentials"]["password"] == "test-password"
assert config["debug"]["cache_directory_prefix"] == "Kestrel2.0-"
Expand All @@ -54,7 +54,28 @@ def test_empty_env_var_in_config():
os.environ["KESTREL_CACHE_DIRECTORY_PREFIX"] = "Kestrel2.0-"
with open(os.getenv("KESTREL_CONFIG"), "w") as fp:
fp.write(test_config)
config = cfg.load_config()
config = load_kestrel_config()
assert config["credentials"]["username"] == "test-user"
assert config["credentials"]["password"] == "test-password"
assert config["debug"]["cache_directory_prefix"] == "$I_DONT_EXIST"
assert config["debug"]["cache_directory_prefix"] == "$I_DONT_EXIST"

def test_yaml_load_in_config(tmp_path):
test_config = """---
credentials:
username: ${TEST_USER}
password: ${TEST_PASSWORD}
loadtest:
xyz:
abc: abc.yaml
"""
os.environ["TEST_USER"] = "test-user"
os.environ["TEST_PASSWORD"] = "test-password"
os.environ["KESTREL_CONFIG"] = os.path.join(tmp_path, "config.yaml")
with open(os.getenv("KESTREL_CONFIG"), "w") as fp:
fp.write(test_config)
with open(os.path.join(tmp_path, "abc.yaml"), "w") as fp:
fp.write("test: fake-value")
config = load_kestrel_config()
assert config["credentials"]["username"] == "test-user"
assert config["credentials"]["password"] == "test-password"
assert config["loadtest"]["xyz"]["abc"]["test"] == "fake-value"
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import logging
from dataclasses import dataclass, field
from typing import Dict, Mapping, Optional
from dataclasses import dataclass
from typing import Dict, Optional

import yaml
from mashumaro.mixins.json import DataClassJSONMixin

from kestrel.config.utils import (
CONFIG_DIR_DEFAULT,
load_user_config,
load_kestrel_config,
)
from kestrel.exceptions import InterfaceNotConfigured
from kestrel.mapping.data_model import load_default_mapping
Expand Down Expand Up @@ -41,14 +41,11 @@ class DataSource(DataClassJSONMixin):
index_pattern: str
timestamp: str
timestamp_format: str
data_model_mapping: Optional[str] = None # Filename for mapping
data_model_map: Mapping = field(default_factory=dict)
data_model_map: Optional[Dict] = None
entity_identifier: Optional[Dict] = None

def __post_init__(self):
if self.data_model_mapping:
with open(self.data_model_mapping, "r") as fp:
self.data_model_map = yaml.safe_load(fp)
else:
if not self.data_model_map:
# Default to the built-in ECS mapping
self.data_model_map = load_default_mapping("ecs")

Expand All @@ -65,6 +62,16 @@ def __post_init__(self):

def load_config():
try:
return Config(**load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT))
interface_config = Config(
**load_user_config(PROFILE_PATH_ENV_VAR, PROFILE_PATH_DEFAULT)
)

# load default entity identifier from main Kestrel config
kestrel_config = load_kestrel_config()
for ds in interface_config.datasources.values():
if not ds.entity_identifier:
ds.entity_identifier = kestrel_config["entity_identifier"]

return interface_config
except TypeError:
raise InterfaceNotConfigured()
4 changes: 3 additions & 1 deletion packages/kestrel_interface_opensearch/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_load_config(tmp_path):
"index_pattern": "logs-*",
"timestamp": "@timestamp",
"timestamp_format": "%Y-%m-%d %H:%M:%S.%f",
"data_model_mapping": str(tmp_path / "mapping.yaml")
"data_model_map": "mapping.yaml"
}
}
}
Expand All @@ -51,3 +51,5 @@ def test_load_config(tmp_path):
assert conn.url == config["connections"]["localhost"]["url"]
assert read_config.connections["localhost"].url == config["connections"]["localhost"]["url"]
assert read_config.datasources["some_ds"].index_pattern == config["datasources"]["some_ds"]["index_pattern"]
assert read_config.datasources["some_ds"].data_model_map["some.field"] == "other.field"
assert read_config.datasources["some_ds"].entity_identifier["process"] == "uid"
Loading

0 comments on commit 1255b9f

Please sign in to comment.