-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: transforms aggregated completion "progress" events into a new fact #1
Changes from all commits
181112c
e4c3862
ebfc41d
5cf7331
66e4d0f
9e223e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
### DBT Completion Aggregator | ||
|
||
Basic dbt package to transform completion and completion aggregator events. | ||
|
||
Extends [aspects-dbt](https://github.com/openedx/aspects-dbt). | ||
|
||
### DBT Resources: | ||
|
||
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) | ||
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers | ||
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support | ||
- Find [dbt events](https://events.getdbt.com) near you | ||
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
# Name your project! Project names should contain only lowercase characters | ||
# and underscores. A good package name should reflect your organization's | ||
# name or the intended use of these models | ||
name: 'dbt_completion_aggregator' | ||
version: '1.0.0' | ||
config-version: 2 | ||
|
||
# This setting configures which "profile" dbt uses for this project. | ||
profile: 'aspects' | ||
|
||
# directory which will store compiled SQL files | ||
target-path: "target" | ||
|
||
# These configurations specify where dbt should look for different types of files. | ||
# The `model-paths` config, for example, states that models in this project can be | ||
# found in the "models/" directory. You probably won't need to change these! | ||
model-paths: ["models"] | ||
analysis-paths: ["analyses"] | ||
test-paths: ["tests"] | ||
seed-paths: ["seeds"] | ||
macro-paths: ["macros"] | ||
snapshot-paths: ["snapshots"] | ||
|
||
clean-targets: # directories to be removed by `dbt clean` | ||
- "target" | ||
- "dbt_packages" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{{ | ||
config( | ||
materialized="materialized_view", | ||
engine=aspects.get_engine("ReplacingMergeTree()"), | ||
primary_key="(org, course_key, verb_id)", | ||
order_by="(org, course_key, verb_id, emission_time, actor_id, object_id, event_id)", | ||
partition_by="(toYYYYMM(emission_time))", | ||
ttl=env_var("ASPECTS_DATA_TTL_EXPRESSION", ""), | ||
) | ||
}} | ||
|
||
select | ||
event_id, | ||
CAST(emission_time, 'DateTime') as emission_time, | ||
actor_id, | ||
object_id, | ||
course_key, | ||
org, | ||
verb_id, | ||
JSON_VALUE( | ||
event, | ||
'$.result.extensions."https://w3id.org/xapi/cmi5/result/extensions/progress"' | ||
) as progress_percent, | ||
JSON_VALUE( | ||
event, | ||
'$.result.completion' | ||
) as completed | ||
from {{ ref("xapi_events_all_parsed") }} | ||
where verb_id = 'http://adlnet.gov/expapi/verbs/progressed' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
with | ||
completions as ( | ||
select | ||
emission_time, | ||
org, | ||
course_key, | ||
actor_id, | ||
progress_percent, | ||
if( | ||
object_id like '%/course/%', | ||
splitByString('/course/', object_id)[-1], | ||
splitByString('/xblock/', object_id)[-1] | ||
) as entity_id, | ||
cast(progress_percent as Float) / 100 as scaled_progress, | ||
row_number() over ( | ||
partition by org, entity_id, actor_id order by scaled_progress desc | ||
) as rn | ||
from {{ ref("aggregated_completion_events") }} | ||
) | ||
|
||
select | ||
completions.emission_time as emission_time, | ||
completions.org as org, | ||
completions.course_key as course_key, | ||
courses.course_name as course_name, | ||
courses.course_run as course_run, | ||
completions.entity_id as entity_id, | ||
if(blocks.block_name != '', blocks.block_name, courses.course_name) as entity_name, | ||
if( | ||
blocks.block_name != '', blocks.display_name_with_location, null | ||
) as entity_name_with_location, | ||
blocks.display_name_with_location as section_subsection_name, | ||
completions.actor_id as actor_id, | ||
cast(completions.scaled_progress as Float) as scaled_progress, | ||
case | ||
when scaled_progress >= 0.9 | ||
then '90-100%' | ||
when scaled_progress >= 0.8 and scaled_progress < 0.9 | ||
then '80-89%' | ||
when scaled_progress >= 0.7 and scaled_progress < 0.8 | ||
then '70-79%' | ||
when scaled_progress >= 0.6 and scaled_progress < 0.7 | ||
then '60-69%' | ||
when scaled_progress >= 0.5 and scaled_progress < 0.6 | ||
then '50-59%' | ||
when scaled_progress >= 0.4 and scaled_progress < 0.5 | ||
then '40-49%' | ||
when scaled_progress >= 0.3 and scaled_progress < 0.4 | ||
then '30-39%' | ||
when scaled_progress >= 0.2 and scaled_progress < 0.3 | ||
then '20-29%' | ||
when scaled_progress >= 0.1 and scaled_progress < 0.2 | ||
then '10-19%' | ||
else '0-9%' | ||
end as completion_bucket, | ||
users.username as username, | ||
users.name as name, | ||
users.email as email | ||
from completions | ||
join {{ ref("course_names") }} courses on completions.course_key = courses.course_key | ||
left join | ||
{{ ref("course_block_names") }} blocks on completions.entity_id = blocks.location | ||
left outer join | ||
{{ ref("dim_user_pii") }} users | ||
on toUUID(completions.actor_id) = users.external_user_id | ||
where rn = 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
version: 2 | ||
|
||
models: | ||
- name: fact_aggregated_completions | ||
database: "{{ env_var('DBT_PROFILE_TARGET_DATABASE', 'reporting') }}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm seeing the same issue you are and I think this is your problem. Since Clickhouse doesn't have a concept of FWIW I do also see this successfully being created in a new There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aspects v1.0.2 solved an issue in which the environment variable for the reporting schema was not correctly set. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's exactly what happened! I ended up deleting the |
||
description: "One record per aggregated completion event for component" | ||
columns: | ||
- name: emission_time | ||
description: "Timestamp, to the second, of when this event was emitted" | ||
data_type: DateTime64(3) | ||
- name: org | ||
data_type: String | ||
description: "The organization that the course belongs to" | ||
- name: course_key | ||
data_type: String | ||
description: "The course key for the course" | ||
- name: course_name | ||
data_type: String | ||
description: "The name of the course" | ||
- name: course_run | ||
data_type: String | ||
description: "The course run for the course" | ||
- name: entity_id | ||
description: "The block ID or course key for the graded entity" | ||
data_type: String | ||
- name: entity_name | ||
data_type: String | ||
description: "The name of the graded entity (course or block)" | ||
- name: entity_name_with_location | ||
data_type: Nullable(String) | ||
description: "The entity's display name with section, subsection, and unit prepended to the name. This provides additional context when looking at block names and can help data consumers understand which block they are analyzing" | ||
- name: section_subsection_name | ||
data_type: Nullable(String) | ||
description: "The name of the section this subsection belongs to, with section_number prepended" | ||
- name: actor_id | ||
data_type: String | ||
description: "The xAPI actor identifier" | ||
- name: scaled_progress | ||
description: "A ratio between 0 and 1, inclusive, of the learner's progress" | ||
data_type: Float32 | ||
- name: completion_bucket | ||
description: "A displayable value of progress sorted into 10% buckets. Useful for grouping progress together to show high-level learner performance" | ||
data_type: String | ||
- name: completed | ||
description: "Flag indicating whether the object has been fully completed" | ||
data_type: Bool | ||
data_tests: | ||
- is_completed: | ||
progress_field: "scaled_progress" | ||
- name: username | ||
data_type: String | ||
description: "The username of the learner" | ||
- name: name | ||
data_type: String | ||
description: "The full name of the learner" | ||
- name: email | ||
data_type: String | ||
description: "The email address of the learner" | ||
|
||
- name: aggregated_completion_events | ||
description: "A materialized view for xAPI events related to aggregated completions" | ||
columns: | ||
- name: event_id | ||
data_type: uuid | ||
description: "The unique identifier for the event" | ||
- name: emission_time | ||
data_type: datetime | ||
description: "The time the event was emitted" | ||
- name: actor_id | ||
data_type: string | ||
description: "The xAPI actor identifier" | ||
- name: object_id | ||
data_type: string | ||
description: "The xAPI object identifier" | ||
- name: course_key | ||
data_type: string | ||
description: "The course identifier" | ||
- name: org | ||
data_type: string | ||
description: "The organization that the course belongs to" | ||
- name: verb_id | ||
data_type: string | ||
description: "The xAPI verb identifier" | ||
- name: progress_percent | ||
data_type: string | ||
description: "The percentage of the xAPI object completed" | ||
- name: completed | ||
description: "Flag indicating whether the object has been fully completed" | ||
data_type: Bool | ||
data_tests: | ||
- is_completed: | ||
progress_field: "progress_percent" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
packages: | ||
- git: "https://github.com/openedx/aspects-dbt.git" | ||
revision: v3.29.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
select org, entity_id, actor_id, count(*) as num_rows | ||
from {{ ref("fact_aggregated_completions") }} | ||
group by org, entity_id, actor_id | ||
having num_rows > 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
{% test is_completed(model, progress_field) %} | ||
|
||
with validation as ( | ||
|
||
select | ||
completion, | ||
{{ progress_field }} as progress, | ||
|
||
from {{ model }} | ||
|
||
), | ||
|
||
validation_errors as ( | ||
|
||
select | ||
completion, scaled_progress | ||
|
||
from validation | ||
-- if this is true, then progress isn't being captured correctly | ||
where completion == true and progress < 100 | ||
|
||
) | ||
|
||
select * | ||
from validation_errors | ||
|
||
{% endtest %} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@bmtcril FYI I also had to qualify this Aspects
get_engine
macro with theaspects.
prefix.Will note this in a "troubleshooting" docs PR, because this was difficult to debug :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh nice, good find 👍