diff --git a/R/geom_dag.R b/R/geom_dag.R index b4a77310..fe4c6fb3 100644 --- a/R/geom_dag.R +++ b/R/geom_dag.R @@ -653,7 +653,7 @@ geom_dag_collider_edges <- function(mapping = NULL, data = NULL, params = list( size = size, arrow = arrow, - strength = curvature, + curvature = curvature, angle = angle, ncp = ncp, lineend = lineend, diff --git a/_pkgdown.yml b/_pkgdown.yml index e11fcbc8..242d1e17 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -53,12 +53,14 @@ navbar: - icon: fa-home fa-lg href: index.html - text: Reference - href: /reference/index.html + href: reference/index.html - text: Articles menu: - text: An Introduction to ggdag - href: /articles/intro-to-ggdag.html + href: articles/intro-to-ggdag.html - text: An Introduction to DAGs - href: /articles/intro-to-dags.html + href: articles/intro-to-dags.html - text: Common Structures of Bias - href: /articles/bias-structures.html + href: articles/bias-structures.html + - text: News + href: news/index.html diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 2e5fc589..d4c3750a 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -1,6 +1,6 @@ - +
@@ -9,17 +9,17 @@We have already seen a few examples of selection bias, but let’s consider a couple more that are potential pitfalls in common design types. Let’s say we’re doing a case-control study and want to assess the effect of smoking on glioma, a type of brain cancer. We have a group of glioma patients at a hospital and want to compare them to a group of controls, so we pick people in the hospital with a broken bone, since that seems to have nothing to do with brain cancer. However, perhaps there is some unknown confounding between smoking and being in the hospital with a broken bone, like being prone to reckless behavior. In the normal population, there is no causal effect of smoking on glioma, but in our case, we’re selecting on people who have been hospitalized, which opens up a back-door path:
-coords <- tibble::tribble(
- ~name, ~x, ~y,
- "glioma", 1, 2,
- "hospitalized", 2, 3,
- "broken_bone", 3, 2,
- "reckless", 4, 1,
- "smoking", 5, 2
-)
-
-dagify(hospitalized ~ broken_bone + glioma,
- broken_bone ~ reckless,
- smoking ~ reckless,
- labels = c(hospitalized = "Hospitalization",
- broken_bone = "Broken Bone",
- glioma = "Glioma",
- reckless = "Reckless \nBehavior",
- smoking = "Smoking"),
- coords = coords) %>%
- ggdag_dconnected("glioma", "smoking", controlling_for = "hospitalized",
- text = FALSE, use_labels = "label", collider_lines = FALSE)
coords <- tibble::tribble(
+ ~name, ~x, ~y,
+ "glioma", 1, 2,
+ "hospitalized", 2, 3,
+ "broken_bone", 3, 2,
+ "reckless", 4, 1,
+ "smoking", 5, 2
+)
+
+dagify(hospitalized ~ broken_bone + glioma,
+ broken_bone ~ reckless,
+ smoking ~ reckless,
+ labels = c(hospitalized = "Hospitalization",
+ broken_bone = "Broken Bone",
+ glioma = "Glioma",
+ reckless = "Reckless \nBehavior",
+ smoking = "Smoking"),
+ coords = coords) %>%
+ ggdag_dconnected("glioma", "smoking", controlling_for = "hospitalized",
+ text = FALSE, use_labels = "label", collider_lines = FALSE)
Even though smoking doesn’t actually cause glioma, it will appear as if there is an association. Actually, in this case, it may make smoking appear to be protective against glioma, since controls are more likely to be smokers.
Let’s also consider how bias arises in loss-to-follow-up. In a randomized clinical trial or cohort study, the main threat of selection bias is not through who enters the study (although that may affect generalizability) but who leaves it. If loss-to-follow-up is associated with the exposure or outcome, the relationship between the two may be biased. Let’s consider a trial where we are testing a new HIV drug and its effect on CD4 white blood cell count. If the treatment causes symptoms, participants may leave the trial. Similarly, there may be those whose HIV is getting worse and thus more symptomatic, which also may cause people to leave the trial. If we only have information on people who stay in the study, we are stratifying by follow-up status:
-dagify(follow_up ~ symptoms,
- symptoms ~ new_rx + dx_severity,
- cd4 ~ dx_severity,
- labels = c(
- follow_up = "Follow-Up",
- symptoms = "Symptoms",
- new_rx = "New HIV Drug",
- dx_severity = "Underyling \nHIV Severity",
- cd4 = "CD4 Count"
- )) %>%
- ggdag_adjust("follow_up", layout = "mds", text = FALSE,
- use_labels = "label", collider_lines = FALSE)
dagify(follow_up ~ symptoms,
+ symptoms ~ new_rx + dx_severity,
+ cd4 ~ dx_severity,
+ labels = c(
+ follow_up = "Follow-Up",
+ symptoms = "Symptoms",
+ new_rx = "New HIV Drug",
+ dx_severity = "Underyling \nHIV Severity",
+ cd4 = "CD4 Count"
+ )) %>%
+ ggdag_adjust("follow_up", layout = "mds", text = FALSE,
+ use_labels = "label", collider_lines = FALSE)
But follow-up is downstream from a collider, symptoms. Controlling for a downstream collider induces bias and, because we only have information on people who remain in the study, we are inadvertently stratifying on follow-up status (see the vignette introducing DAGs for more on downstream colliders). Thus, the effect estimate between the HIV drug and CD4 count will be biased.
@@ -312,9 +316,8 @@NEWS.md
+ node_equivalent_class()
that didn’t account for the way dagitty returns DAGs with no directionnode_equivalent_class()
that didn’t check to
nodeis_false()
to avoid dependency on R 3.5.0{}
to adjustment set names to reflect conventionNEWS.md
file to track changes to the package.tidy_dagitty
object to data.frame — as.data.frame.tidy_dagitty • ggdagtidy_dagitty
object to tbl — as.tbl.tidy_daggity • ggdagtidy_dagitty
objects — dplyr • ggdagtidy_dagitty
object for ggplot2
— fortify • ggdagtidy_dagitty
— print.tidy_dagitty • ggdagtidy_dagitty
object to tbl_df — tbl_df.tidy_daggity • ggdagdagitty
object — tidy_dagitty • ggdag