Skip to content

Commit

Permalink
feat: transforms aggregated completion "progress" events into a new f…
Browse files Browse the repository at this point in the history
…act (#1)

Materialized view shows the highest level of completion reached for each block + user
  • Loading branch information
pomegranited authored Jul 2, 2024
1 parent 1d6f350 commit d1368ad
Show file tree
Hide file tree
Showing 13 changed files with 261 additions and 0 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
### DBT Completion Aggregator

Basic dbt package to transform completion and completion aggregator events.

Extends [aspects-dbt](https://github.com/openedx/aspects-dbt).

### DBT Resources:

- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
- Find [dbt events](https://events.getdbt.com) near you
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
Empty file added analyses/.gitkeep
Empty file.
27 changes: 27 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'dbt_completion_aggregator'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'aspects'

# directory which will store compiled SQL files
target-path: "target"

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
Empty file added macros/.gitkeep
Empty file.
29 changes: 29 additions & 0 deletions models/completion_aggregator/aggregated_completion_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{
config(
materialized="materialized_view",
engine=aspects.get_engine("ReplacingMergeTree()"),
primary_key="(org, course_key, verb_id)",
order_by="(org, course_key, verb_id, emission_time, actor_id, object_id, event_id)",
partition_by="(toYYYYMM(emission_time))",
ttl=env_var("ASPECTS_DATA_TTL_EXPRESSION", ""),
)
}}

select
event_id,
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
course_key,
org,
verb_id,
JSON_VALUE(
event,
'$.result.extensions."https://w3id.org/xapi/cmi5/result/extensions/progress"'
) as progress_percent,
JSON_VALUE(
event,
'$.result.completion'
) as completed
from {{ ref("xapi_events_all_parsed") }}
where verb_id = 'http://adlnet.gov/expapi/verbs/progressed'
66 changes: 66 additions & 0 deletions models/completion_aggregator/fact_aggregated_completions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
with
completions as (
select
emission_time,
org,
course_key,
actor_id,
progress_percent,
if(
object_id like '%/course/%',
splitByString('/course/', object_id)[-1],
splitByString('/xblock/', object_id)[-1]
) as entity_id,
cast(progress_percent as Float) / 100 as scaled_progress,
row_number() over (
partition by org, entity_id, actor_id order by scaled_progress desc
) as rn
from {{ ref("aggregated_completion_events") }}
)

select
completions.emission_time as emission_time,
completions.org as org,
completions.course_key as course_key,
courses.course_name as course_name,
courses.course_run as course_run,
completions.entity_id as entity_id,
if(blocks.block_name != '', blocks.block_name, courses.course_name) as entity_name,
if(
blocks.block_name != '', blocks.display_name_with_location, null
) as entity_name_with_location,
blocks.display_name_with_location as section_subsection_name,
completions.actor_id as actor_id,
cast(completions.scaled_progress as Float) as scaled_progress,
case
when scaled_progress >= 0.9
then '90-100%'
when scaled_progress >= 0.8 and scaled_progress < 0.9
then '80-89%'
when scaled_progress >= 0.7 and scaled_progress < 0.8
then '70-79%'
when scaled_progress >= 0.6 and scaled_progress < 0.7
then '60-69%'
when scaled_progress >= 0.5 and scaled_progress < 0.6
then '50-59%'
when scaled_progress >= 0.4 and scaled_progress < 0.5
then '40-49%'
when scaled_progress >= 0.3 and scaled_progress < 0.4
then '30-39%'
when scaled_progress >= 0.2 and scaled_progress < 0.3
then '20-29%'
when scaled_progress >= 0.1 and scaled_progress < 0.2
then '10-19%'
else '0-9%'
end as completion_bucket,
users.username as username,
users.name as name,
users.email as email
from completions
join {{ ref("course_names") }} courses on completions.course_key = courses.course_key
left join
{{ ref("course_block_names") }} blocks on completions.entity_id = blocks.location
left outer join
{{ ref("dim_user_pii") }} users
on toUUID(completions.actor_id) = users.external_user_id
where rn = 1
92 changes: 92 additions & 0 deletions models/completion_aggregator/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
version: 2

models:
- name: fact_aggregated_completions
database: "{{ env_var('DBT_PROFILE_TARGET_DATABASE', 'reporting') }}"
description: "One record per aggregated completion event for component"
columns:
- name: emission_time
description: "Timestamp, to the second, of when this event was emitted"
data_type: DateTime64(3)
- name: org
data_type: String
description: "The organization that the course belongs to"
- name: course_key
data_type: String
description: "The course key for the course"
- name: course_name
data_type: String
description: "The name of the course"
- name: course_run
data_type: String
description: "The course run for the course"
- name: entity_id
description: "The block ID or course key for the graded entity"
data_type: String
- name: entity_name
data_type: String
description: "The name of the graded entity (course or block)"
- name: entity_name_with_location
data_type: Nullable(String)
description: "The entity's display name with section, subsection, and unit prepended to the name. This provides additional context when looking at block names and can help data consumers understand which block they are analyzing"
- name: section_subsection_name
data_type: Nullable(String)
description: "The name of the section this subsection belongs to, with section_number prepended"
- name: actor_id
data_type: String
description: "The xAPI actor identifier"
- name: scaled_progress
description: "A ratio between 0 and 1, inclusive, of the learner's progress"
data_type: Float32
- name: completion_bucket
description: "A displayable value of progress sorted into 10% buckets. Useful for grouping progress together to show high-level learner performance"
data_type: String
- name: completed
description: "Flag indicating whether the object has been fully completed"
data_type: Bool
data_tests:
- is_completed:
progress_field: "scaled_progress"
- name: username
data_type: String
description: "The username of the learner"
- name: name
data_type: String
description: "The full name of the learner"
- name: email
data_type: String
description: "The email address of the learner"

- name: aggregated_completion_events
description: "A materialized view for xAPI events related to aggregated completions"
columns:
- name: event_id
data_type: uuid
description: "The unique identifier for the event"
- name: emission_time
data_type: datetime
description: "The time the event was emitted"
- name: actor_id
data_type: string
description: "The xAPI actor identifier"
- name: object_id
data_type: string
description: "The xAPI object identifier"
- name: course_key
data_type: string
description: "The course identifier"
- name: org
data_type: string
description: "The organization that the course belongs to"
- name: verb_id
data_type: string
description: "The xAPI verb identifier"
- name: progress_percent
data_type: string
description: "The percentage of the xAPI object completed"
- name: completed
description: "Flag indicating whether the object has been fully completed"
data_type: Bool
data_tests:
- is_completed:
progress_field: "progress_percent"
3 changes: 3 additions & 0 deletions packages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
packages:
- git: "https://github.com/openedx/aspects-dbt.git"
revision: v3.29.0
Empty file added seeds/.gitkeep
Empty file.
Empty file added snapshots/.gitkeep
Empty file.
Empty file added tests/.gitkeep
Empty file.
4 changes: 4 additions & 0 deletions tests/generic/test_completion_uniqueness.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
select org, entity_id, actor_id, count(*) as num_rows
from {{ ref("fact_aggregated_completions") }}
group by org, entity_id, actor_id
having num_rows > 1
27 changes: 27 additions & 0 deletions tests/generic/test_is_completed.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{% test is_completed(model, progress_field) %}

with validation as (

select
completion,
{{ progress_field }} as progress,

from {{ model }}

),

validation_errors as (

select
completion, scaled_progress

from validation
-- if this is true, then progress isn't being captured correctly
where completion == true and progress < 100

)

select *
from validation_errors

{% endtest %}

0 comments on commit d1368ad

Please sign in to comment.