From 30b7fadbb89f212bee03611fecdc1b71db0cfe22 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 1 Mar 2024 15:10:01 +0100 Subject: [PATCH 1/4] Python: Add test --- .../test/experimental/dataflow/fieldflow/test_dict.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/ql/test/experimental/dataflow/fieldflow/test_dict.py b/python/ql/test/experimental/dataflow/fieldflow/test_dict.py index c2f74b83c3dd..2a55885e2b9a 100644 --- a/python/ql/test/experimental/dataflow/fieldflow/test_dict.py +++ b/python/ql/test/experimental/dataflow/fieldflow/test_dict.py @@ -45,6 +45,16 @@ def test_dict_update(): SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']" SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)" + +@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..) +def test_dict_update_fresh_key(): + # we had a regression where we did not create a dictionary element content + # for keys used in "inline update" like this + d = {} + d["fresh_key"] = SOURCE + SINK(d["fresh_key"]) # $ MISSING: flow="SOURCE, l:-1 -> d['fresh_key']" + + @expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..) def test_dict_setdefault(): d = {} From eeda4355f1619185f7b900f80c06d0a0ffd41bd3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 1 Mar 2024 15:21:13 +0100 Subject: [PATCH 2/4] Python: Fix missing DictionaryElementContent --- .../python/dataflow/new/internal/DataFlowPrivate.qll | 2 ++ .../python/dataflow/new/internal/DataFlowPublic.qll | 12 +++++++++++- .../experimental/dataflow/fieldflow/test_dict.py | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index c11e0cde64dc..47f41d0cd057 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -809,6 +809,8 @@ predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeT * TODO: Once TaintTracking no longer uses `dictStoreStep`, unify the two predicates. */ private predicate moreDictStoreSteps(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) { + // NOTE: It's important to add logic to the newtype definition of + // DictionaryElementContent if you add new cases here. exists(SubscriptNode subscript | nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() = subscript.getObject() and nodeFrom.asCfgNode() = subscript.(DefinitionNode).getValue() and diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 923fc441caf8..73c87992c483 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -605,9 +605,19 @@ newtype TContent = } or /** An element of a dictionary under a specific key. */ TDictionaryElementContent(string key) { - key = any(KeyValuePair kvp).getKey().(StrConst).getS() + // {"key": ...} + key = any(KeyValuePair kvp).getKey().(StrConst).getText() or + // func(key=...) key = any(Keyword kw).getArg() + or + // d["key"] = ... + key = any(SubscriptNode sub | sub.isStore() | sub.getIndex().getNode().(StrConst).getText()) + or + // d.setdefault("key", ...) + exists(CallNode call | call.getFunction().(AttrNode).getName() = "setdefault" | + key = call.getArg(0).getNode().(StrConst).getText() + ) } or /** An element of a dictionary under any key. */ TDictionaryElementAnyContent() or diff --git a/python/ql/test/experimental/dataflow/fieldflow/test_dict.py b/python/ql/test/experimental/dataflow/fieldflow/test_dict.py index 2a55885e2b9a..e51e0ccbdf30 100644 --- a/python/ql/test/experimental/dataflow/fieldflow/test_dict.py +++ b/python/ql/test/experimental/dataflow/fieldflow/test_dict.py @@ -52,7 +52,7 @@ def test_dict_update_fresh_key(): # for keys used in "inline update" like this d = {} d["fresh_key"] = SOURCE - SINK(d["fresh_key"]) # $ MISSING: flow="SOURCE, l:-1 -> d['fresh_key']" + SINK(d["fresh_key"]) # $ flow="SOURCE, l:-1 -> d['fresh_key']" @expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..) From d99a763ef79ab5248e01fc369ed4c7a8a3e490a0 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 1 Mar 2024 15:23:51 +0100 Subject: [PATCH 3/4] Python: add change-note --- python/ql/lib/change-notes/2024-03-01-dict-update-content.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2024-03-01-dict-update-content.md diff --git a/python/ql/lib/change-notes/2024-03-01-dict-update-content.md b/python/ql/lib/change-notes/2024-03-01-dict-update-content.md new file mode 100644 index 000000000000..dfb8d247fffa --- /dev/null +++ b/python/ql/lib/change-notes/2024-03-01-dict-update-content.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Fixed missing flow for dictionary updates (`d[] = ...`) when `` is a string constant not used in dictionary literals or as name of keyword-argument. From 16cb6c2044db3076e600f18423238f405d695a38 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 4 Mar 2024 11:41:47 +0100 Subject: [PATCH 4/4] Python: Fix validTest expectations Co-authored-by: yoff --- python/ql/test/experimental/dataflow/fieldflow/test_dict.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/ql/test/experimental/dataflow/fieldflow/test_dict.py b/python/ql/test/experimental/dataflow/fieldflow/test_dict.py index e51e0ccbdf30..f64a5187925d 100644 --- a/python/ql/test/experimental/dataflow/fieldflow/test_dict.py +++ b/python/ql/test/experimental/dataflow/fieldflow/test_dict.py @@ -46,7 +46,6 @@ def test_dict_update(): SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)" -@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..) def test_dict_update_fresh_key(): # we had a regression where we did not create a dictionary element content # for keys used in "inline update" like this