Skip to content

Commit

Permalink
Merge pull request #448 from opencybersecurityalliance/mapping
Browse files Browse the repository at this point in the history
Entity name/attribute mapping to/from OCSF; use env vars in config
  • Loading branch information
subbyte authored Jan 2, 2024
2 parents 1b47d2e + a09427d commit 3513047
Show file tree
Hide file tree
Showing 9 changed files with 711 additions and 9 deletions.
70 changes: 70 additions & 0 deletions packages-nextgen/kestrel_core/src/kestrel/config/kestrel.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# syntax default values
language:
default_variable: "_"
default_sort_order: "desc"
default_datasource_schema: "stixshifter"
default_analytics_schema: "python"

# how a Kestrel session is executed
session:
cache_directory_prefix: "kestrel-session-" # under system temp directory
local_database_path: "local.db"
log_path: "session.log"
show_execution_summary: true

# whether/how to prefetch all records/observations for entities
prefetch:

# enable/disable prefetch for command
#
# If prefetch is enabled, Kestrel will send additional queries to the data
# source to search for related records regarding entities retrieved from the
# user-specified pattern, collecting more complete information (attributes,
# connections to other entities) of the entities from different records.
switch_per_command:
get: true
find: true

# declare the list of entity types to not prefetch
#
# This can be used when a user finds prefetch hinders the performance with
# large amount of results for one or more generic type of entities. For
# example, the data source may have millions of records containing
# `C:\Windows\SYSTEM32\ntdll.dll` touched by all Windows processes in a short
# amount of time. Executing a Kestrel command `f = FIND file LINKED p` will
# retrieve the file from a process and then start prefetch to gain
# information/connections of the file from all processes. Retrieval of
# millions records will likely result in a performance issue, thus the user
# can put `file` in this list to disable prefetch for it.
excluded_entities:
-
# - file
# - user-account
# - x-oca-asset

# Detailed logic to identify the same process from different records is more
# complex than many data source query language can express, so Kestrel
# retrieves potential same process candidate records and perform fine-grained
# process identification in Kestrel with these parameters.
process_identification:
pid_but_name_changed_time_begin_offset: -5 # seconds
pid_but_name_changed_time_end_offset: 5 # seconds
pid_and_name_time_begin_offset: -3600 # seconds
pid_and_name_time_end_offset: 3600 # seconds
pid_and_ppid_time_begin_offset: -3600 # seconds
pid_and_ppid_time_end_offset: 3600 # seconds
pid_and_name_and_ppid_time_begin_offset: -86400 # seconds
pid_and_name_and_ppid_time_end_offset: 86400 # seconds

# option when generating STIX query
stixquery:
timerange_start_offset: -300 # seconds
timerange_stop_offset: 300 # seconds
support_id: false # STIX 2.0 does not support unique ID

# debug options
debug:
env_var: "KESTREL_DEBUG" # debug mode if the environment variable exists
cache_directory_prefix: "kestrel-" # under system temp directory
session_exit_marker: "session.exited"
maximum_exited_session: 3
44 changes: 44 additions & 0 deletions packages-nextgen/kestrel_core/src/kestrel/config/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
import yaml
import pathlib
import logging

from kestrel.utils import update_nested_dict, load_data_file

CONFIG_DIR_DEFAULT = pathlib.Path.home() / ".config" / "kestrel"
CONFIG_PATH_DEFAULT = CONFIG_DIR_DEFAULT / "kestrel.yaml"
CONFIG_PATH_ENV_VAR = "KESTREL_CONFIG" # override CONFIG_PATH_DEFAULT if provided

_logger = logging.getLogger(__name__)


def load_default_config():
_logger.debug(f"Loading default config file...")
default_config = load_data_file("kestrel.config", "kestrel.yaml")
return yaml.safe_load(os.path.expandvars(default_config))


def load_user_config(config_path_env_var, config_path_default):
config_path = os.getenv(config_path_env_var, config_path_default)
config_path = os.path.expanduser(config_path)
config = {}
if config_path:
try:
with open(config_path, "r") as fp:
_logger.debug(f"User configuration file found: {config_path}")
config = yaml.safe_load(os.path.expandvars(fp.read()))
except FileNotFoundError:
_logger.debug(f"User configuration file not exist.")
return config


def load_config():
config_default = load_default_config()
config_user = load_user_config(CONFIG_PATH_ENV_VAR, CONFIG_PATH_DEFAULT)
_logger.debug(f"User configuration loaded: {config_user}")
_logger.debug(f"Updating default config with user config...")
return update_nested_dict(config_default, config_user)


if __name__ == "__main__":
...
6 changes: 5 additions & 1 deletion packages-nextgen/kestrel_core/src/kestrel/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,13 @@ class InevaluableInstruction(KestrelError):
pass


class MappingParseError(KestrelError):
pass


class InterfaceNotFound(KestrelError):
pass


class InterfaceNameCollision(KestrelError):
pass
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
process.command_line: process.cmd_line
process.end: process.end_time
process.entity_id: process.uid
process.executable: process.file.path
process.exit_code: process.exit_code
process.name: process.name
process.pid: process.pid
process.start: process.start_time
process.thread.id: process.tid
# process.args
# process.args_count
# process.entry_meta.type
# process.env_vars
# process.interactive
# process.same_as_process
# process.thread.capabilities.effective
# process.thread.capabilities.permitted
# process.thread.name
# process.title
# process.tty
# process.uptime
# process.vpid
# process.working_directory
file.accessed: file.accessed_time
file.attributes: file.attributes
file.created: file.created_time
file.ctime: file.modified_time
file.directory: file.parent_folder
file.gid: file.xattributes.primary_group
file.mime_type: file.mime_type
file.mode: file.mode
file.mtime: file.modified_time
file.name: file.name
file.owner: file.owner
file.path: file.path
file.size: file.size
file.target_path: file.xattributes.link_name
file.type: file.type
# file.device
# file.drive_letter
# file.extension
# file.fork_name
# file.inode
# file.uid
group.name: group.name
group.id: group.uid
# group.domain
client.bytes: traffic.bytes_out
client.domain: src_endpoint.domain
client.ip: src_endpoint.ip
client.mac: src_endpoint.mac
client.packets: traffic.packets_out
client.port: src_endpoint.port
# client.address
# client.nat.ip
# client.nat.port
# client.registered_domain
# client.subdomain
# client.top_level_domain
destination.bytes: traffic.bytes_in
destination.domain: dst_endpoint.domain
destination.ip: dst_endpoint.ip
destination.mac: dst_endpoint.mac
destination.packets: traffic.packets_in
destination.port: dst_endpoint.port
# destination.address
# destination.nat.ip
# destination.nat.port
# destination.registered_domain
# destination.subdomain
# destination.top_level_domain
server.bytes: traffic.bytes_in
server.domain: dst_endpoint.domain
server.ip: dst_endpoint.ip
server.mac: dst_endpoint.mac
server.packets: traffic.packets_in
server.port: dst_endpoint.port
# server.address
# server.nat.ip
# server.nat.port
# server.registered_domain
# server.subdomain
# server.top_level_domain
source.bytes: traffic.bytes_out
source.domain: src_endpoint.domain
source.ip: src_endpoint.ip
source.mac: src_endpoint.mac
source.packets: traffic.packets_out
source.port: src_endpoint.port
# source.address
# source.nat.ip
# source.nat.port
# source.registered_domain
# source.subdomain
# source.top_level_domain

# Network Activity [4001], HTTP Activity [4002], DNS Activity [4003], Email Activity [4009]
network.application: app_name
network.bytes: traffic.bytes
network.direction: connection_info.direction
network.iana_number: connection_info.protocol_num
network.packets: traffic.packets
network.protocol: connection_info.protocol_name
network.type: connection_info.protocol_ver_id
# network.community_id
# network.forwarded_ip
# network.inner
# network.name
# network.transport:
hash.md5: file.hashes[?algorithm_id == 1].value
hash.sha1: file.hashes[?algorithm_id == 2].value
hash.sha256: file.hashes[?algorithm_id == 3].value
hash.sha512: file.hashes[?algorithm_id == 4].value
hash.ssdeep: file.hashes[?algorithm_id == 5].value
hash.tlsh: file.hashes[?algorithm_id == 6].value
# hash.sha384
x509.not_after: certificate.expiration_time
x509.not_before: certificate.created_time
x509.serial_number: certificate.serial_number
x509.signature_algorithm: certificate.fingerprints.algorithm
x509.version_number: certificate.version
# x509.alternative_names
# x509.issuer.common_name: certificate.issuer
# x509.issuer.country: certificate.issuer
# x509.issuer.distinguished_name: certificate.issuer
# x509.issuer.locality: certificate.issuer
# x509.issuer.organization: certificate.issuer
# x509.issuer.organizational_unit: certificate.issuer
# x509.issuer.state_or_province: certificate.issuer
# x509.public_key_algorithm
# x509.public_key_curve
# x509.public_key_exponent
# x509.public_key_size
# x509.subject.common_name: certificate.subject
# x509.subject.country: certificate.subject
# x509.subject.distinguished_name: certificate.subject
# x509.subject.locality: certificate.subject
# x509.subject.organization: certificate.subject
# x509.subject.organizational_unit: certificate.subject
# x509.subject.state_or_province: certificate.subject
as.number: device.org.number
as.organization.name: device.org.name
geo.city_name: location.city
geo.continent_name: location.continent
geo.country_iso_code: location.county
geo.location: location.coordinates
geo.postal_code: location.postal_code
geo.region_iso_code: location.region
# geo.continent_code
# geo.country_name
# geo.name
# geo.region_name
# geo.timezone
user.domain: user.domain
user.email: user.email_addr
user.full_name: user.full_name
user.id: user.uid
user.name: user.name
# user.roles
# user.hash:

referenced_fields:
process.group:
ref: group
prefix: process
process.hash:
ref: hash
prefix: process
process.parent:
ref: process # ECS entity used for attribute mapping
prefix: process # OCSF Prefix
target_entity: parent_process # Updated OCSF entity name
process.user:
ref: user
prefix: process
# process.code_signature: code_signature
# process.entry_leader: process
# process.entry_leader.parent: process
# process.entry_leader.parent.session_leader: process
# process.entry_meta.source: source
# process.group_leader: process
# process.macho: macho
# process.parent.group_leader: process
# process.pe: pe
# process.previous: process
# process.real_group: group
# process.real_user: user
# process.saved_group: group
# process.saved_user: user
# process.session_leader: process
# process.session_leader.parent: process
# process.session_leader.parent.session_leader: process
# process.supplemental_groups: group
file.hash:
ref: hash
prefix: null
file.x509:
ref: x509
prefix: tls
# file.code_signature.*
# file.pe.*
client.as:
ref: as
prefix: null
client.geo:
ref: geo
prefix: src_endpoint
# client.user:
# ref: user
# prefix: src_endpoint
destination.as:
ref: as
prefix: null
destination.geo:
ref: geo
prefix: dst_endpoint
# destination.user:
# ref: user
# prefix: dst_endpoint
server.as:
ref: as
prefix: null
server.geo:
ref: geo
prefix: dst_endpoint
# server.user:
# ref: user
# prefix: dst_endpoint
source.as:
ref: as
prefix: null
source.geo:
ref: geo
prefix: src_endpoint
# source.user:
# ref: user
# prefix: src_endpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
process: process
file: file
group: group
client: network_endpoint
destination: network_endpoint
server: network_endpoint
source: network_endpoint
network: network_activity
user: user
Loading

0 comments on commit 3513047

Please sign in to comment.