From e824a3196fdca1a76df425bda12ba91997fa1300 Mon Sep 17 00:00:00 2001
From: "Yanan Cao (PyTorch)" <ycao@meta.com>
Date: Thu, 19 Dec 2024 15:37:56 -0800
Subject: [PATCH] executorch/exir/program/test (#7397)

Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/7397

Reviewed By: avikchaudhuri, ydwu4

Differential Revision: D67383235
---
 .../runtime/test/export_stateful_model.py     |   2 +-
 .../coreml/test/test_coreml_partitioner.py    |   7 +-
 backends/apple/mps/test/test_mps_utils.py     |   5 +-
 backends/cadence/aot/compiler.py              |   2 +-
 backends/example/test_example_delegate.py     |   4 +-
 backends/qualcomm/tests/test_qnn_delegate.py  |  11 +-
 backends/qualcomm/tests/utils.py              |   2 +-
 backends/qualcomm/utils/utils.py              |   6 +-
 backends/vulkan/test/test_vulkan_delegate.py  |   2 +-
 backends/xnnpack/partition/graphs/sdpa.py     |   1 +
 backends/xnnpack/test/tester/tester.py        |   2 +-
 build/packaging/smoke_test.py                 |   2 +-
 .../tests/test_delegation_info.py             |   2 +-
 devtools/bundled_program/util/test_util.py    |   1 +
 devtools/etrecord/tests/etrecord_test.py      |   2 +-
 .../devtools-integration-tutorial.py          |   7 +-
 .../export-to-executorch-tutorial.py          |  28 +-
 examples/apple/coreml/scripts/export.py       |   9 +-
 .../apple/coreml/scripts/inspector_utils.py   |   5 +-
 .../devtools/scripts/gen_sample_etrecord.py   |   5 +-
 examples/llm_manual/export_nanogpt.py         |   2 +-
 .../mediatek/aot_utils/oss_utils/utils.py     |   4 +-
 .../mediatek/model_export_scripts/llama.py    |   2 +-
 .../text_decoder/test/test_text_decoder.py    |   1 +
 .../test/test_vision_encoder.py               |   1 +
 examples/models/llava/export_llava.py         |   7 +-
 .../models/phi-3-mini-lora/export_model.py    |   4 +-
 examples/qualcomm/oss_scripts/llama2/llama.py |   3 +-
 .../qualcomm/oss_scripts/llama3_2/llama.py    |   2 +-
 examples/qualcomm/scripts/export_example.py   |   2 +-
 .../qualcomm/scripts/mobilebert_fine_tune.py  |   2 +-
 examples/qualcomm/utils.py                    |   2 +-
 exir/backend/test/demos/rpc/test_rpc.py       |   8 +-
 .../test/demos/test_delegate_aten_mode.py     |   6 +-
 exir/backend/test/test_backends.py            |   2 +-
 exir/backend/test/test_backends_lifted.py     |  89 +++----
 exir/backend/test/test_compatibility.py       |   2 +-
 exir/backend/test/test_graph_partition.py     |   3 +-
 .../test/test_lowered_backend_module.py       |  16 +-
 exir/backend/test/test_partitioner.py         |  29 +--
 exir/backend/test/test_passes.py              |   5 +-
 exir/backend/test/test_utils.py               |  30 +--
 exir/capture/_capture.py                      |   3 +-
 exir/emit/test/test_emit.py                   | 190 ++++++--------
 exir/program/test/test_fake_program.py        |   3 +-
 exir/program/test/test_program.py             |  32 +--
 exir/tests/models.py                          |   5 +-
 exir/tests/test_arg_validator.py              |   4 +-
 exir/tests/test_delegate.py                   |  15 +-
 exir/tests/test_dynamic_shape_propagation.py  |   9 +-
 exir/tests/test_memory_format_ops_pass.py     |   2 +-
 .../test_memory_format_ops_pass_utils.py      |   4 +-
 exir/tests/test_memory_planning.py            |  23 +-
 exir/tests/test_passes.py                     | 243 ++++--------------
 exir/tests/test_print_program.py              |   2 +-
 exir/tests/test_quant_fusion_pass.py          |  24 +-
 exir/tests/test_quantization.py               |   2 +-
 exir/tests/test_remove_view_copy.py           |   6 +-
 exir/tests/test_serde.py                      |  10 +-
 exir/tests/test_tracer.py                     |   5 +-
 exir/tests/test_verification.py               |  34 +--
 exir/verification/test/test_verifier.py       |   6 +-
 extension/android_test/add_model.py           |   2 +-
 extension/llm/modules/test/test_attention.py  |   2 +
 .../modules/test/test_position_embeddings.py  |   7 +-
 extension/pybindings/test/make_test.py        |   6 +-
 .../training/examples/XOR/export_model.py     |   2 +-
 extension/training/pybindings/test/test.py    |   2 +-
 profiler/test/test_profiler_e2e.py            |   4 +-
 test/end2end/exported_module.py               |   1 +
 test/models/export_delegated_program.py       |   6 +-
 .../generate_linear_out_bundled_program.py    |   2 +-
 72 files changed, 396 insertions(+), 587 deletions(-)

diff --git a/backends/apple/coreml/runtime/test/export_stateful_model.py b/backends/apple/coreml/runtime/test/export_stateful_model.py
index 61d1a93980..e477d1425b 100644
--- a/backends/apple/coreml/runtime/test/export_stateful_model.py
+++ b/backends/apple/coreml/runtime/test/export_stateful_model.py
@@ -47,7 +47,7 @@ def main() -> None:
         torch.randn((1, embedding_dim)),
         torch.tensor([0]),
     )
-    exported_model = export(model, example_inputs)
+    exported_model = export(model, example_inputs, strict=True)
     edge_program_manager = exir.to_edge(exported_model)
     compile_specs = CoreMLBackend.generate_compile_specs(
         compute_precision=ct.precision.FLOAT16,
diff --git a/backends/apple/coreml/test/test_coreml_partitioner.py b/backends/apple/coreml/test/test_coreml_partitioner.py
index 64e1570f0b..2c3b9feb5c 100644
--- a/backends/apple/coreml/test/test_coreml_partitioner.py
+++ b/backends/apple/coreml/test/test_coreml_partitioner.py
@@ -16,7 +16,6 @@
 
 
 class TestCoreMLPartitioner(unittest.TestCase):
-
     # TODO(T182928844): Delegate dim order op to backend.
     edge_compile_config = executorch.exir.EdgeCompileConfig(_skip_dim_order=True)
 
@@ -34,7 +33,7 @@ def forward(self, a, x, b):
         model.eval()
 
         example_inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
 
         edge_program_manager = executorch.exir.to_edge(
             exir_program_aten, compile_config=self.edge_compile_config
@@ -61,7 +60,7 @@ def test_vit_skip_conv(self):
         model.eval()
 
         example_inputs = (torch.randn(1, 3, 224, 224),)
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
         edge_program_manager = executorch.exir.to_edge(
             exir_program_aten, compile_config=self.edge_compile_config
         )
@@ -106,7 +105,7 @@ def forward(self, q, k_val, input_pos):
         k_val = torch.randn((1, embedding_dim))
         input_pos = torch.tensor([0])
         example_inputs = (q, k_val, input_pos)
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
 
         compile_specs = CoreMLBackend.generate_compile_specs(
             minimum_deployment_target=ct.target.iOS18
diff --git a/backends/apple/mps/test/test_mps_utils.py b/backends/apple/mps/test/test_mps_utils.py
index 39ce5df511..43ae9aa0f0 100644
--- a/backends/apple/mps/test/test_mps_utils.py
+++ b/backends/apple/mps/test/test_mps_utils.py
@@ -247,10 +247,7 @@ def lower_module_and_test_output(
             )
 
             executorch_program = to_edge(
-                export(
-                    delegated_program,
-                    sample_inputs,
-                ),
+                export(delegated_program, sample_inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(
                     _check_ir_validity=False,
                     _skip_dim_order=True,  # TODO(T182928844): Delegate dim order op to backend.
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
index 6b3a023181..df1f42601b 100644
--- a/backends/cadence/aot/compiler.py
+++ b/backends/cadence/aot/compiler.py
@@ -176,7 +176,7 @@ def export_program(
     torch._C._set_mkldnn_enabled(False)
 
     # else: capture the model and return it.
-    expo_program = export(model, inputs)
+    expo_program = export(model, inputs, strict=True)
 
     if dump_graphs:
         logging.info("Exported graph:")
diff --git a/backends/example/test_example_delegate.py b/backends/example/test_example_delegate.py
index d830c1bb31..9e2b4e458c 100644
--- a/backends/example/test_example_delegate.py
+++ b/backends/example/test_example_delegate.py
@@ -60,7 +60,7 @@ def get_example_inputs():
 
         quantized_gm = m
         exported_program = to_edge(
-            export(quantized_gm, copy.deepcopy(example_inputs)),
+            export(quantized_gm, copy.deepcopy(example_inputs), strict=True),
             compile_config=EDGE_COMPILE_CONFIG,
         )
 
@@ -92,7 +92,7 @@ def test_delegate_mobilenet_v2(self):
 
         quantized_gm = m
         exported_program = to_edge(
-            export(quantized_gm, copy.deepcopy(example_inputs)),
+            export(quantized_gm, copy.deepcopy(example_inputs), strict=True),
             compile_config=EDGE_COMPILE_CONFIG,
         )
 
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index 37ff54a82d..f9550d6483 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -1617,7 +1617,7 @@ def test_qnn_backend_multi_contexts_composite(self):
         )
         sample_input = module.get_random_input()
         edge_prog = to_edge(
-            torch.export.export(module, sample_input),
+            torch.export.export(module, sample_input, strict=True),
         )
         update_spill_fill_size(edge_prog.exported_program())
         exec_prog = edge_prog.to_executorch()
@@ -1957,7 +1957,7 @@ def calibrator(gm):
         self.assertEqual(len(exported_progs), 1)
         # lower all graph again, the skipped operators will be left in CPU
         exec_prog = to_edge(
-            torch.export.export(graph_module, sample_input),
+            torch.export.export(graph_module, sample_input, strict=True),
         ).to_executorch()
         self.verify_output(module, sample_input, exec_prog)
 
@@ -2004,7 +2004,7 @@ def calibrator(gm):
         self.assertEqual(len(exported_progs), 2)
         # lower all graph again, the skipped operators will be left in CPU
         exec_prog = exec_prog = to_edge(
-            torch.export.export(graph_module, sample_input),
+            torch.export.export(graph_module, sample_input, strict=True),
         ).to_executorch()
         self.verify_output(module, sample_input, exec_prog)
 
@@ -2041,7 +2041,7 @@ def calibrator(gm):
         self.assertEqual(len(exported_progs), 5)
         # lower all graph again, the skipped operators will be delegated with fp16
         exec_prog = to_edge(
-            torch.export.export(graph_module, sample_input),
+            torch.export.export(graph_module, sample_input, strict=True),
         ).to_executorch()
         self.verify_output(module, sample_input, exec_prog)
 
@@ -2086,7 +2086,7 @@ def test_qnn_backend_multi_contexts_composite(self):
         )
         sample_input = module.get_random_input()
         edge_prog = to_edge(
-            torch.export.export(module, sample_input),
+            torch.export.export(module, sample_input, strict=True),
         )
         update_spill_fill_size(edge_prog.exported_program())
         exec_prog = edge_prog.to_executorch()
@@ -2721,7 +2721,6 @@ def test_ssd300_vgg16(self):
 
 
 class TestExampleQaihubScript(TestQNN):
-
     def required_envs(self, conditions=None) -> bool:
         conditions = [] if conditions is None else conditions
         return all(
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index 96591eb890..9d78683eb9 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -385,7 +385,7 @@ def get_qdq_module(
         custom_quant_annotations: Tuple[Callable] = (),
         quant_dtype: QuantDtype = QuantDtype.use_8a8w,
     ) -> torch.fx.GraphModule:
-        m = torch.export.export(module, inputs).module()
+        m = torch.export.export(module, inputs, strict=True).module()
 
         quantizer = QnnQuantizer()
         quantizer.add_custom_quant_annotations(custom_quant_annotations)
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
index 33be00ed51..a73fe6944e 100644
--- a/backends/qualcomm/utils/utils.py
+++ b/backends/qualcomm/utils/utils.py
@@ -337,7 +337,7 @@ def capture_program(
     inputs: Tuple[torch.Tensor],
     custom_pass_config: FrozenSet[str] = frozenset(),
 ) -> exir.ExirExportedProgram:
-    ep = torch.export.export(module, inputs)
+    ep = torch.export.export(module, inputs, strict=True)
     decomposed_ep = ep.run_decompositions(get_decomp_table())
     # We choose call_operator by target in ConvertBinaryOpsWithScalar
     # because it is the same source_fn_stack for MultiheadAttention
@@ -551,7 +551,7 @@ def prepare_subgm(subgm, subgm_name):
 
     fp_node_id_set = fp_node_id_set if fp_node_id_set is not None else set()
     fp_node_op_set = fp_node_op_set if fp_node_op_set is not None else set()
-    graph_module = torch.export.export(nn_module, sample_input).module()
+    graph_module = torch.export.export(nn_module, sample_input, strict=True).module()
     # define node support type
     capability_partitioner = CapabilityBasedPartitioner(
         graph_module,
@@ -664,7 +664,7 @@ def forward(self, *inputs):
                 ).default(inputs)
 
         model = Model()
-        prog = torch.export.export(model, tuple(inputs.values()))
+        prog = torch.export.export(model, tuple(inputs.values()), strict=True)
         # bookkeeping for variables' life cycle
         return {
             "custom_op": custom_op,
diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
index 89bdb073a9..85326d1e89 100644
--- a/backends/vulkan/test/test_vulkan_delegate.py
+++ b/backends/vulkan/test/test_vulkan_delegate.py
@@ -112,7 +112,7 @@ def run_test():
             model(*sample_inputs)
 
             program: ExportedProgram = export(
-                model, sample_inputs, dynamic_shapes=dynamic_shapes
+                model, sample_inputs, dynamic_shapes=dynamic_shapes, strict=True
             )
 
             edge_program = to_edge_transform_and_lower(
diff --git a/backends/xnnpack/partition/graphs/sdpa.py b/backends/xnnpack/partition/graphs/sdpa.py
index 4f4afa92e2..24fe35ea56 100644
--- a/backends/xnnpack/partition/graphs/sdpa.py
+++ b/backends/xnnpack/partition/graphs/sdpa.py
@@ -76,6 +76,7 @@ def forward(
                         v,
                         mask,
                     ),
+                    strict=True,
                 ),
                 compile_config=get_xnnpack_edge_compile_config(),
             )
diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
index c561f9f661..1b6f03512b 100644
--- a/backends/xnnpack/test/tester/tester.py
+++ b/backends/xnnpack/test/tester/tester.py
@@ -194,7 +194,7 @@ def run(
         inputs: Tuple[torch.Tensor],
     ) -> None:
         self.exported_program = export(
-            artifact, inputs, dynamic_shapes=self.dynamic_shapes
+            artifact, inputs, dynamic_shapes=self.dynamic_shapes, strict=True
         )
 
     @property
diff --git a/build/packaging/smoke_test.py b/build/packaging/smoke_test.py
index 1573e37bf5..8f2bd08004 100644
--- a/build/packaging/smoke_test.py
+++ b/build/packaging/smoke_test.py
@@ -65,7 +65,7 @@ def export_linear_model() -> bytes:
 
     # Export the pytorch model and process for ExecuTorch.
     print("Exporting program...")
-    exported_program = export(LinearModel(), example_inputs)
+    exported_program = export(LinearModel(), example_inputs, strict=True)
     print("Lowering to edge...")
     edge_program = to_edge(exported_program)
     print("Creating ExecuTorch program...")
diff --git a/devtools/backend_debug/tests/test_delegation_info.py b/devtools/backend_debug/tests/test_delegation_info.py
index 6ff5169094..980ef8d241 100644
--- a/devtools/backend_debug/tests/test_delegation_info.py
+++ b/devtools/backend_debug/tests/test_delegation_info.py
@@ -31,7 +31,7 @@ def forward(self, a, x, b):
 
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-        edge = to_edge(torch.export.export(m, inputs)).to_backend(
+        edge = to_edge(torch.export.export(m, inputs, strict=True)).to_backend(
             AddMulPartitionerDemo()
         )
         delegation_info = get_delegation_info(edge.exported_program().graph_module)
diff --git a/devtools/bundled_program/util/test_util.py b/devtools/bundled_program/util/test_util.py
index 505186f3a0..62776852db 100644
--- a/devtools/bundled_program/util/test_util.py
+++ b/devtools/bundled_program/util/test_util.py
@@ -271,6 +271,7 @@ def get_common_executorch_program() -> (
         m_name: export(
             StatefulWrapperModule(eager_model, getattr(eager_model, m_name)),
             capture_inputs[m_name],
+            strict=True,
         )
         for m_name in eager_model.method_names
     }
diff --git a/devtools/etrecord/tests/etrecord_test.py b/devtools/etrecord/tests/etrecord_test.py
index daef7c3e1e..cf50662c2a 100644
--- a/devtools/etrecord/tests/etrecord_test.py
+++ b/devtools/etrecord/tests/etrecord_test.py
@@ -69,7 +69,7 @@ def get_test_model_with_bundled_program(self):
 
     def get_test_model_with_manager(self):
         f = models.BasicSinMax()
-        aten_dialect = export(f, f.get_random_inputs())
+        aten_dialect = export(f, f.get_random_inputs(), strict=True)
         edge_program: EdgeProgramManager = to_edge(
             aten_dialect, compile_config=EdgeCompileConfig(_check_ir_validity=False)
         )
diff --git a/docs/source/tutorials_source/devtools-integration-tutorial.py b/docs/source/tutorials_source/devtools-integration-tutorial.py
index dece18fa8c..b9028dc91f 100644
--- a/docs/source/tutorials_source/devtools-integration-tutorial.py
+++ b/docs/source/tutorials_source/devtools-integration-tutorial.py
@@ -89,10 +89,7 @@ def forward(self, x):
 
 model = Net()
 
-aten_model: ExportedProgram = export(
-    model,
-    (torch.randn(1, 1, 32, 32),),
-)
+aten_model: ExportedProgram = export(model, (torch.randn(1, 1, 32, 32),), strict=True)
 
 edge_program_manager: EdgeProgramManager = to_edge(
     aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True)
@@ -141,7 +138,7 @@ def forward(self, x):
 
 # Step 1: ExecuTorch Program Export
 m_name = "forward"
-method_graphs = {m_name: export(model, (torch.randn(1, 1, 32, 32),))}
+method_graphs = {m_name: export(model, (torch.randn(1, 1, 32, 32),), strict=True)}
 
 # Step 2: Construct Method Test Suites
 inputs = [[torch.randn(1, 1, 32, 32)] for _ in range(2)]
diff --git a/docs/source/tutorials_source/export-to-executorch-tutorial.py b/docs/source/tutorials_source/export-to-executorch-tutorial.py
index fac3eab08e..87ae6d8ca6 100644
--- a/docs/source/tutorials_source/export-to-executorch-tutorial.py
+++ b/docs/source/tutorials_source/export-to-executorch-tutorial.py
@@ -66,7 +66,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 example_args = (torch.randn(1, 3, 256, 256),)
-aten_dialect: ExportedProgram = export(SimpleConv(), example_args)
+aten_dialect: ExportedProgram = export(SimpleConv(), example_args, strict=True)
 print(aten_dialect)
 
 ######################################################################
@@ -101,7 +101,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
 
 example_args = (torch.randn(3, 3), torch.randn(3, 3))
-aten_dialect: ExportedProgram = export(Basic(), example_args)
+aten_dialect: ExportedProgram = export(Basic(), example_args, strict=True)
 
 # Works correctly
 print(aten_dialect.module()(torch.ones(3, 3), torch.ones(3, 3)))
@@ -131,7 +131,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 dim1_x = Dim("dim1_x", min=1, max=10)
 dynamic_shapes = {"x": {1: dim1_x}, "y": {1: dim1_x}}
 aten_dialect: ExportedProgram = export(
-    Basic(), example_args, dynamic_shapes=dynamic_shapes
+    Basic(), example_args, dynamic_shapes=dynamic_shapes, strict=True
 )
 print(aten_dialect)
 
@@ -213,7 +213,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 print("Quantized Graph")
 print(converted_graph)
 
-aten_dialect: ExportedProgram = export(converted_graph, example_args)
+aten_dialect: ExportedProgram = export(converted_graph, example_args, strict=True)
 print("ATen Dialect Graph")
 print(aten_dialect)
 
@@ -243,7 +243,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 from executorch.exir import EdgeProgramManager, to_edge
 
 example_args = (torch.randn(1, 3, 256, 256),)
-aten_dialect: ExportedProgram = export(SimpleConv(), example_args)
+aten_dialect: ExportedProgram = export(SimpleConv(), example_args, strict=True)
 
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 print("Edge Dialect Graph")
@@ -267,10 +267,10 @@ def forward(self, x):
 
 
 encode_args = (torch.randn(1, 10),)
-aten_encode: ExportedProgram = export(Encode(), encode_args)
+aten_encode: ExportedProgram = export(Encode(), encode_args, strict=True)
 
 decode_args = (torch.randn(1, 5),)
-aten_decode: ExportedProgram = export(Decode(), decode_args)
+aten_decode: ExportedProgram = export(Decode(), decode_args, strict=True)
 
 edge_program: EdgeProgramManager = to_edge(
     {"encode": aten_encode, "decode": aten_decode}
@@ -291,7 +291,7 @@ def forward(self, x):
 # rather than the ``torch.ops.aten`` namespace.
 
 example_args = (torch.randn(1, 3, 256, 256),)
-aten_dialect: ExportedProgram = export(SimpleConv(), example_args)
+aten_dialect: ExportedProgram = export(SimpleConv(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 print("Edge Dialect Graph")
 print(edge_program.exported_program())
@@ -357,7 +357,7 @@ def forward(self, x):
 
 # Export and lower the module to Edge Dialect
 example_args = (torch.ones(1),)
-aten_dialect: ExportedProgram = export(LowerableModule(), example_args)
+aten_dialect: ExportedProgram = export(LowerableModule(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 to_be_lowered_module = edge_program.exported_program()
 
@@ -423,7 +423,7 @@ def forward(self, x):
 
 
 example_args = (torch.ones(1),)
-aten_dialect: ExportedProgram = export(ComposedModule(), example_args)
+aten_dialect: ExportedProgram = export(ComposedModule(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 exported_program = edge_program.exported_program()
 print("Edge Dialect graph")
@@ -461,7 +461,7 @@ def forward(self, a, x, b):
 
 
 example_args = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-aten_dialect: ExportedProgram = export(Foo(), example_args)
+aten_dialect: ExportedProgram = export(Foo(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 exported_program = edge_program.exported_program()
 print("Edge Dialect graph")
@@ -495,7 +495,7 @@ def forward(self, a, x, b):
 
 
 example_args = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-aten_dialect: ExportedProgram = export(Foo(), example_args)
+aten_dialect: ExportedProgram = export(Foo(), example_args, strict=True)
 edge_program: EdgeProgramManager = to_edge(aten_dialect)
 exported_program = edge_program.exported_program()
 delegated_program = edge_program.to_backend(AddMulPartitionerDemo())
@@ -577,7 +577,9 @@ def forward(self, x):
 pre_autograd_aten_dialect = export_for_training(M(), example_args).module()
 # Optionally do quantization:
 # pre_autograd_aten_dialect = convert_pt2e(prepare_pt2e(pre_autograd_aten_dialect, CustomBackendQuantizer))
-aten_dialect: ExportedProgram = export(pre_autograd_aten_dialect, example_args)
+aten_dialect: ExportedProgram = export(
+    pre_autograd_aten_dialect, example_args, strict=True
+)
 edge_program: exir.EdgeProgramManager = exir.to_edge(aten_dialect)
 # Optionally do delegation:
 # edge_program = edge_program.to_backend(CustomBackendPartitioner)
diff --git a/examples/apple/coreml/scripts/export.py b/examples/apple/coreml/scripts/export.py
index 53316ea200..a4ceaee05d 100644
--- a/examples/apple/coreml/scripts/export.py
+++ b/examples/apple/coreml/scripts/export.py
@@ -88,7 +88,9 @@ def partition_module_to_coreml(module):
 
 def lower_module_to_coreml(module, compile_specs, example_inputs):
     module = module.eval()
-    edge = to_edge(export(module, example_inputs), compile_config=_EDGE_COMPILE_CONFIG)
+    edge = to_edge(
+        export(module, example_inputs, strict=True), compile_config=_EDGE_COMPILE_CONFIG
+    )
     # All of the subsequent calls on the edge_dialect_graph generated above (such as delegation or
     # to_executorch()) are done in place and the graph is also modified in place. For debugging purposes
     # we would like to keep a copy of the original edge dialect graph and hence we create a deepcopy of
@@ -107,7 +109,8 @@ def lower_module_to_coreml(module, compile_specs, example_inputs):
 def export_lowered_module_to_executorch_program(lowered_module, example_inputs):
     lowered_module(*example_inputs)
     exec_prog = to_edge(
-        export(lowered_module, example_inputs), compile_config=_EDGE_COMPILE_CONFIG
+        export(lowered_module, example_inputs, strict=True),
+        compile_config=_EDGE_COMPILE_CONFIG,
     ).to_executorch(config=exir.ExecutorchBackendConfig(extract_delegate_segments=True))
 
     return exec_prog
@@ -170,7 +173,7 @@ def main():
 
     if args.use_partitioner:
         model.eval()
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
 
         edge_program_manager = exir.to_edge(exir_program_aten)
         edge_copy = copy.deepcopy(edge_program_manager)
diff --git a/examples/apple/coreml/scripts/inspector_utils.py b/examples/apple/coreml/scripts/inspector_utils.py
index 08af6fb348..be614f6db1 100644
--- a/examples/apple/coreml/scripts/inspector_utils.py
+++ b/examples/apple/coreml/scripts/inspector_utils.py
@@ -87,10 +87,7 @@ def to_core_aten(
     module: torch.nn.Module,
     example_inputs: Tuple[Value, ...],
 ) -> ExportedProgram:
-    core_aten_program = export(
-        mod=module,
-        args=example_inputs,
-    )
+    core_aten_program = export(mod=module, args=example_inputs, strict=True)
     return core_aten_program
 
 
diff --git a/examples/devtools/scripts/gen_sample_etrecord.py b/examples/devtools/scripts/gen_sample_etrecord.py
index 55544395b5..a6b3d48725 100644
--- a/examples/devtools/scripts/gen_sample_etrecord.py
+++ b/examples/devtools/scripts/gen_sample_etrecord.py
@@ -31,10 +31,7 @@
 
 def gen_etrecord(model: torch.nn.Module, inputs: Any, output_path=None):
     f = model
-    aten_dialect: ExportedProgram = export(
-        f,
-        inputs,
-    )
+    aten_dialect: ExportedProgram = export(f, inputs, strict=True)
     edge_program: EdgeProgramManager = to_edge(
         aten_dialect, compile_config=EdgeCompileConfig(_check_ir_validity=True)
     )
diff --git a/examples/llm_manual/export_nanogpt.py b/examples/llm_manual/export_nanogpt.py
index 2d69c50ec9..9de2e831e2 100644
--- a/examples/llm_manual/export_nanogpt.py
+++ b/examples/llm_manual/export_nanogpt.py
@@ -30,7 +30,7 @@
     m = export_for_training(
         model, example_inputs, dynamic_shapes=dynamic_shape
     ).module()
-    traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape)
+    traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape, strict=True)
 
 # Convert the model into a runnable ExecuTorch program.
 # To be further lowered to Xnnpack backend, `traced_model` needs xnnpack-specific edge compile config
diff --git a/examples/mediatek/aot_utils/oss_utils/utils.py b/examples/mediatek/aot_utils/oss_utils/utils.py
index cb55822b9d..2246b8eeb1 100755
--- a/examples/mediatek/aot_utils/oss_utils/utils.py
+++ b/examples/mediatek/aot_utils/oss_utils/utils.py
@@ -37,9 +37,9 @@ def build_executorch_binary(
         for data in dataset:
             annotated_model(*data)
         quantized_model = convert_pt2e(annotated_model, fold_quantize=False)
-        aten_dialect = torch.export.export(quantized_model, inputs)
+        aten_dialect = torch.export.export(quantized_model, inputs, strict=True)
     else:
-        aten_dialect = torch.export.export(model, inputs)
+        aten_dialect = torch.export.export(model, inputs, strict=True)
 
     from executorch.exir.program._program import to_edge_transform_and_lower
 
diff --git a/examples/mediatek/model_export_scripts/llama.py b/examples/mediatek/model_export_scripts/llama.py
index 77c91bc635..5da1772707 100644
--- a/examples/mediatek/model_export_scripts/llama.py
+++ b/examples/mediatek/model_export_scripts/llama.py
@@ -338,7 +338,7 @@ def export_to_et_ir(
         print(f"Exporting Shape {shape} to:\n{dest_path}")
         example_inputs = model.get_example_inputs(*ntok_and_cache)
         aten_dialect: exir.ExportedProgram = torch.export.export(
-            converted_graph, example_inputs
+            converted_graph, example_inputs, strict=True
         )
 
         print("Lowering to Edge Dialect Graph")
diff --git a/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py b/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py
index 8e678801b8..3da00cd70c 100644
--- a/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py
+++ b/examples/models/llama3_2_vision/text_decoder/test/test_text_decoder.py
@@ -70,6 +70,7 @@ def test_llama3_2_text_decoder_aoti(self) -> None:
                 model.get_example_inputs(),
                 kwargs=model.get_example_kwarg_inputs(),
                 dynamic_shapes=model.get_dynamic_shapes(),
+                strict=True,
             )
         with tempfile.TemporaryDirectory() as tmpdir:
             path = torch._inductor.aoti_compile_and_package(
diff --git a/examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py b/examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py
index c2f1e77cee..2edeb16ab7 100644
--- a/examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py
+++ b/examples/models/llama3_2_vision/vision_encoder/test/test_vision_encoder.py
@@ -32,6 +32,7 @@ def test_flamingo_vision_encoder(self) -> None:
             encoder,
             model.get_example_inputs(),
             dynamic_shapes=model.get_dynamic_shapes(),
+            strict=True,
         )
         with tempfile.TemporaryDirectory() as tmpdir:
             path = torch._inductor.aoti_compile_and_package(
diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py
index bdb30db735..dabb07e61c 100644
--- a/examples/models/llava/export_llava.py
+++ b/examples/models/llava/export_llava.py
@@ -116,6 +116,7 @@ def forward(self, input_pos, embeddings):
             manager.pre_autograd_graph_module,
             manager.example_inputs,
             dynamic_shapes=manager._get_dynamic_shape(),
+            strict=True,
         )
     return text_model_ep
 
@@ -158,6 +159,7 @@ def forward(self, images):
             manager.pre_autograd_graph_module,
             manager.example_inputs,
             dynamic_shapes=manager.dynamic_shapes,
+            strict=True,
         )
     return image_encoder_ep
 
@@ -176,7 +178,10 @@ def quant_embedding(model):
     dynamic_shapes = [{1: token_dim_1}]
     with torch.no_grad():
         token_embedding_ep = torch.export.export(
-            quantized_token_embed.embed_tokens, (prompt,), dynamic_shapes=dynamic_shapes
+            quantized_token_embed.embed_tokens,
+            (prompt,),
+            dynamic_shapes=dynamic_shapes,
+            strict=True,
         )
     return token_embedding_ep
 
diff --git a/examples/models/phi-3-mini-lora/export_model.py b/examples/models/phi-3-mini-lora/export_model.py
index e6f291bd58..aa7994cf4d 100644
--- a/examples/models/phi-3-mini-lora/export_model.py
+++ b/examples/models/phi-3-mini-lora/export_model.py
@@ -55,7 +55,7 @@ def export_phi3_mini_lora(model) -> None:
     tokens = randint(0, vocab_size, (batch_size, seq_len), dtype=long)
     example_args = (tokens,)
     with sdpa_kernel([SDPBackend.MATH]):
-        aten_dialect: ExportedProgram = export(model, example_args)
+        aten_dialect: ExportedProgram = export(model, example_args, strict=True)
 
         # 2. to_edge: Make optimizations for Edge devices.
         print("Lowering to edge dialect")
@@ -93,7 +93,7 @@ def export_phi3_mini_lora_training(model) -> None:
     labels = tokens
     example_args = (tokens, labels)
     with sdpa_kernel([SDPBackend.MATH]):
-        exported_graph: ExportedProgram = export(model, example_args)
+        exported_graph: ExportedProgram = export(model, example_args, strict=True)
         print("Creating a joint forward-backwards graph for training")
         joint_graph = _export_forward_backward(exported_graph)
 
diff --git a/examples/qualcomm/oss_scripts/llama2/llama.py b/examples/qualcomm/oss_scripts/llama2/llama.py
index 323874a3fa..55f84bbcab 100755
--- a/examples/qualcomm/oss_scripts/llama2/llama.py
+++ b/examples/qualcomm/oss_scripts/llama2/llama.py
@@ -108,7 +108,6 @@ def annotate_cat(node: Node, quantization_config: QuantizationConfig):
     def annotate_single_in_single_out(
         node: Node, quantization_config: QuantizationConfig
     ) -> None:
-
         input_qspec_map = {}
         input_act = node.args[0]
         input_qspec_map[input_act] = quantization_config.input_activation
@@ -356,7 +355,7 @@ def quantize(self, quant_dtype, custom_annotations=()):
 
         with torch.no_grad():
             fx_graph_module = torch.export.export(
-                self.llama_model, self.inputs
+                self.llama_model, self.inputs, strict=True
             ).module()
             fx_graph_module = prepare_pt2e(fx_graph_module, quantizer)
         print("Quantizing the model...")
diff --git a/examples/qualcomm/oss_scripts/llama3_2/llama.py b/examples/qualcomm/oss_scripts/llama3_2/llama.py
index bb6c65aea2..72d4a905c0 100755
--- a/examples/qualcomm/oss_scripts/llama3_2/llama.py
+++ b/examples/qualcomm/oss_scripts/llama3_2/llama.py
@@ -236,7 +236,7 @@ def quantize(self, quant_dtype, args, custom_annotations=()):
 
         with torch.no_grad():
             fx_graph_module = torch.export.export(
-                self.llama_model, self.inputs
+                self.llama_model, self.inputs, strict=True
             ).module()
             fx_graph_module = prepare_pt2e(fx_graph_module, quantizer)
         logging.info("Quantizing the model...")
diff --git a/examples/qualcomm/scripts/export_example.py b/examples/qualcomm/scripts/export_example.py
index 7445ba4a5e..23f1f59a7d 100644
--- a/examples/qualcomm/scripts/export_example.py
+++ b/examples/qualcomm/scripts/export_example.py
@@ -61,7 +61,7 @@ def main() -> None:
     quantizer = QnnQuantizer()
 
     # Typical pytorch 2.0 quantization flow
-    m = torch.export.export(model.eval(), example_inputs).module()
+    m = torch.export.export(model.eval(), example_inputs, strict=True).module()
     m = prepare_pt2e(m, quantizer)
     # Calibration
     m(*example_inputs)
diff --git a/examples/qualcomm/scripts/mobilebert_fine_tune.py b/examples/qualcomm/scripts/mobilebert_fine_tune.py
index 8051d15716..4ecdaf3583 100755
--- a/examples/qualcomm/scripts/mobilebert_fine_tune.py
+++ b/examples/qualcomm/scripts/mobilebert_fine_tune.py
@@ -292,7 +292,7 @@ def calibrator(gm):
         )
         # lower all graph again, the skipped operators will be left in CPU
         exec_prog = to_edge(
-            torch.export.export(graph_module, inputs[0]),
+            torch.export.export(graph_module, inputs[0], strict=True),
         ).to_executorch()
 
         with open(f"{args.artifact}/{pte_filename}.pte", "wb") as file:
diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py
index bebe99c1d7..c2d2f002aa 100755
--- a/examples/qualcomm/utils.py
+++ b/examples/qualcomm/utils.py
@@ -281,7 +281,7 @@ def build_executorch_binary(
         None: The function writes the output to a specified .pte file.
     """
     if quant_dtype is not None:
-        captured_model = torch.export.export(model, inputs).module()
+        captured_model = torch.export.export(model, inputs, strict=True).module()
         if qat_training_data:
             quantizer = custom_quantizer or make_quantizer(
                 quant_dtype=quant_dtype, is_qat=True
diff --git a/exir/backend/test/demos/rpc/test_rpc.py b/exir/backend/test/demos/rpc/test_rpc.py
index 63feb954fe..d53f62cb33 100644
--- a/exir/backend/test/demos/rpc/test_rpc.py
+++ b/exir/backend/test/demos/rpc/test_rpc.py
@@ -104,7 +104,7 @@ def test_delegate_whole_program(self):
         simple_net = self.get_a_simple_net()
         simple_net_input = simple_net.get_example_inputs()
         exported_program = to_edge(
-            export(simple_net, simple_net_input),
+            export(simple_net, simple_net_input, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False,
             ),
@@ -124,7 +124,9 @@ def forward(self, *args):
 
         composite_model = CompositeModule()
 
-        exec_prog = to_edge(export(composite_model, simple_net_input)).to_executorch()
+        exec_prog = to_edge(
+            export(composite_model, simple_net_input, strict=True)
+        ).to_executorch()
 
         executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer)
 
@@ -159,7 +161,7 @@ def forward(self, a, x, b):
         model = Model()
         inputs = (torch.ones(2, 2), torch.ones(2, 2), torch.ones(2, 2))
 
-        exported_program = to_edge(export(model, inputs))
+        exported_program = to_edge(export(model, inputs, strict=True))
 
         # First lower to demo backend
         demo_backend_lowered = exported_program.to_backend(AddMulPartitionerDemo())
diff --git a/exir/backend/test/demos/test_delegate_aten_mode.py b/exir/backend/test/demos/test_delegate_aten_mode.py
index 920cc08434..59b6e0b32f 100644
--- a/exir/backend/test/demos/test_delegate_aten_mode.py
+++ b/exir/backend/test/demos/test_delegate_aten_mode.py
@@ -35,7 +35,7 @@ def forward(self, a, x, b):
 
         add_mul_module = AddMulModule()
         model_inputs = (torch.ones(2, 2), 2 * torch.ones(2, 2), 3 * torch.ones(2, 2))
-        edge_graph_module = to_edge(export(add_mul_module, model_inputs))
+        edge_graph_module = to_edge(export(add_mul_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_add_mul = to_backend(
@@ -56,7 +56,9 @@ def forward(self, a, x, b):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch()
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch()
 
         buff = exec_prog.buffer
 
diff --git a/exir/backend/test/test_backends.py b/exir/backend/test/test_backends.py
index df2b25d055..d2bcfa3167 100644
--- a/exir/backend/test/test_backends.py
+++ b/exir/backend/test/test_backends.py
@@ -1251,7 +1251,7 @@ def forward(self, x: Dict[str, torch.Tensor]):
                 return y
 
         inputs = ({"a": torch.randn(2, 2), "b": torch.randn(2, 2)},)
-        edge_prog = exir.to_edge(torch.export.export(M(), inputs))
+        edge_prog = exir.to_edge(torch.export.export(M(), inputs, strict=True))
         lowered_gm = to_backend(
             BackendWithCompilerDemo.__name__, edge_prog.exported_program(), []
         )
diff --git a/exir/backend/test/test_backends_lifted.py b/exir/backend/test/test_backends_lifted.py
index 7e5bfa6089..3c55bebd32 100644
--- a/exir/backend/test/test_backends_lifted.py
+++ b/exir/backend/test/test_backends_lifted.py
@@ -129,7 +129,7 @@ def forward(self, x):
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
         expected_res = sin_module(*model_inputs)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
 
         lowered_sin_module = to_backend(
             "BackendWithCompilerDemo", edgeir_m.exported_program(), []
@@ -154,7 +154,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
@@ -174,7 +174,9 @@ def forward(self, x):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             )
@@ -248,7 +250,7 @@ def forward(self, a, x, b):
 
         add_mul_module = AddMulModule()
         model_inputs = (torch.ones(2, 2), 2 * torch.ones(2, 2), 3 * torch.ones(2, 2))
-        edge_graph_module = to_edge(export(add_mul_module, model_inputs))
+        edge_graph_module = to_edge(export(add_mul_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_add_mul = to_backend(
@@ -269,7 +271,9 @@ def forward(self, a, x, b):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             )
@@ -298,7 +302,7 @@ def forward(self, x):
         sin_module = SinModule()
         # the backend only  accepts shape <= 4
         model_inputs = (torch.ones(6),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
@@ -318,7 +322,9 @@ def forward(self, x):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             ),
@@ -361,7 +367,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
@@ -383,7 +389,9 @@ def forward(self, x):
 
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             ),
@@ -452,7 +460,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         error_msg = r"call_function aten.cos.default is not supported in backend BackendWithCompilerDemo"
 
         with self.assertRaisesRegex(
@@ -473,7 +481,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         error_msg = r"Backend FakeBackendWithCompilerDemo was not found."
 
         with self.assertRaisesRegex(
@@ -499,7 +507,9 @@ def forward(self, x):
         # sin_module is an nn.Module
         to_be_lowered = LowerableSubModel()
         example_input = (torch.ones(1),)
-        to_be_lowered_exir_submodule = to_edge(export(to_be_lowered, example_input))
+        to_be_lowered_exir_submodule = to_edge(
+            export(to_be_lowered, example_input, strict=True)
+        )
 
         max_value = example_input[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
@@ -538,7 +548,9 @@ def forward(self, x):
         # Verify the input works with eager module
         composite_model(*model_inputs)
 
-        exec_prog = to_edge(export(composite_model, model_inputs)).to_executorch(
+        exec_prog = to_edge(
+            export(composite_model, model_inputs, strict=True)
+        ).to_executorch(
             config=exir.ExecutorchBackendConfig(
                 extract_delegate_segments=extract_delegate_segments
             ),
@@ -598,14 +610,14 @@ def forward(self, x_raw, h, c):
         orig_res = composite_m(*inputs)
 
         traced = to_edge(
-            export(composite_m, inputs),
+            export(composite_m, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False, _use_edge_ops=True
             ),
         )
 
         program_without_delegates = to_edge(
-            export(CompositeModel(3), inputs),
+            export(CompositeModel(3), inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False,
             ),
@@ -719,17 +731,14 @@ def forward(self, x_raw, h, c):
         orig_res = composite_m(*inputs)
 
         traced = to_edge(
-            export(composite_m, inputs),
+            export(composite_m, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False, _use_edge_ops=True
             ),
         )
 
         program_without_delegates = to_edge(
-            export(
-                CompositeModel(3),
-                (input_x, input_h, input_c),
-            ),
+            export(CompositeModel(3), (input_x, input_h, input_c), strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False,
             ),
@@ -842,7 +851,7 @@ def forward(self, a, x, b):
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
         orig_res = m(*inputs)
 
-        ep = to_edge(export(m, inputs))
+        ep = to_edge(export(m, inputs, strict=True))
         executorch_prog = ep
         executorch_prog = executorch_prog.to_backend(AddMulPartitionerDemo())
         executorch_prog = executorch_prog.to_executorch(
@@ -899,7 +908,7 @@ def forward(self, x, y):
 
         inputs = (torch.randn(1, 3), torch.randn(1, 3))
         orig_res = Model()(*inputs)
-        ep = to_edge(export(Model(), inputs))
+        ep = to_edge(export(Model(), inputs, strict=True))
         executorch_prog = ep
         executorch_prog = executorch_prog.to_backend(AddAttributePartitionerDemo())
         executorch_prog = executorch_prog.to_executorch(
@@ -962,7 +971,7 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
                     partition_tags=partition_tags,
                 )
 
-        ep = to_edge(export(Model(), inputs))
+        ep = to_edge(export(Model(), inputs, strict=True))
         with self.assertRaises(AssertionError):
             _ = ep.to_backend(BadPartitioner())
 
@@ -988,10 +997,7 @@ def test_quantized_with_delegate(self) -> None:
 
         # fails to trace here
         converted_linear_gm = to_edge(
-            export(
-                converted_linear,
-                example_inputs,
-            ),
+            export(converted_linear, example_inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False,
             ),
@@ -1023,12 +1029,7 @@ def forward(self, x, y):
         f = Module()
         inputs = (torch.ones(2, 2), torch.ones(2, 2))
         orig_res = f(*inputs)
-        orig = to_edge(
-            export(
-                f,
-                inputs,
-            )
-        )
+        orig = to_edge(export(f, inputs, strict=True))
         partitioned = orig
         partitioned = partitioned.to_backend(AddMulPartitionerDemo())
 
@@ -1077,12 +1078,7 @@ def forward(self, xs, y):
         f = Module()
         inputs = (torch.ones(2, 2), torch.ones(2, 2))
         orig_res = f(*inputs)
-        orig = to_edge(
-            export(
-                f,
-                inputs,
-            )
-        )
+        orig = to_edge(export(f, inputs, strict=True))
         partitioned = orig
         partitioned = partitioned.to_backend(AddMulPartitionerDemo())
 
@@ -1151,12 +1147,7 @@ def forward(self, xs, pred1, pred2, y):
 
         f = Module()
         orig_res = f(*inputs)
-        orig = to_edge(
-            export(
-                f,
-                inputs,
-            )
-        )
+        orig = to_edge(export(f, inputs, strict=True))
         partitioned = orig
         partitioned = partitioned.to_backend(AddMulPartitionerDemo())
 
@@ -1219,7 +1210,7 @@ def forward(self, x: List[torch.Tensor]):
 
         f = Module()
         inputs = ([torch.randn(2, 2), torch.randn(2, 2)],)
-        edge_prog = to_edge(export(f, inputs))
+        edge_prog = to_edge(export(f, inputs, strict=True))
         lowered_gm = to_backend(
             BackendWithCompilerDemo.__name__, edge_prog.exported_program(), []
         )
@@ -1232,7 +1223,7 @@ def __init__(self):
             def forward(self, x: List[torch.Tensor]):
                 return self.lowered(x)
 
-        gm = to_edge(export(ComposedM(), inputs))
+        gm = to_edge(export(ComposedM(), inputs, strict=True))
         gm.exported_program().module()(*inputs)
 
     def test_dict_input(self):
@@ -1243,7 +1234,7 @@ def forward(self, x: Dict[str, torch.Tensor]):
 
         f = Module()
         inputs = ({"a": torch.randn(2, 2), "b": torch.randn(2, 2)},)
-        edge_prog = to_edge(export(f, inputs))
+        edge_prog = to_edge(export(f, inputs, strict=True))
         lowered_gm = to_backend(
             BackendWithCompilerDemo.__name__, edge_prog.exported_program(), []
         )
@@ -1256,5 +1247,5 @@ def __init__(self):
             def forward(self, x: List[torch.Tensor]):
                 return self.lowered(x)
 
-        gm = to_edge(export(ComposedM(), inputs))
+        gm = to_edge(export(ComposedM(), inputs, strict=True))
         gm.exported_program().module()(*inputs)
diff --git a/exir/backend/test/test_compatibility.py b/exir/backend/test/test_compatibility.py
index 97f3e2b51b..9d87aa5be0 100644
--- a/exir/backend/test/test_compatibility.py
+++ b/exir/backend/test/test_compatibility.py
@@ -32,7 +32,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
diff --git a/exir/backend/test/test_graph_partition.py b/exir/backend/test/test_graph_partition.py
index 401e1c0307..87dd6dc729 100644
--- a/exir/backend/test/test_graph_partition.py
+++ b/exir/backend/test/test_graph_partition.py
@@ -25,7 +25,7 @@ def get_graph_module(
     ) -> torch.fx.GraphModule:
         graph_module = (
             to_edge(
-                export(module, inputs),
+                export(module, inputs, strict=True),
                 compile_config=EdgeCompileConfig(
                     _check_ir_validity=False,
                 ),
@@ -70,7 +70,6 @@ def extract_partition_list(
         supported_modules: List[torch.nn.Module],
         op_support: Optional[OperatorSupportBase] = None,
     ) -> List:
-
         node_list = self.get_node_list(graph_module, supported_modules)
 
         partition_list = generate_partitions_from_list_of_nodes(
diff --git a/exir/backend/test/test_lowered_backend_module.py b/exir/backend/test/test_lowered_backend_module.py
index 65b098f955..dcc5841bc3 100644
--- a/exir/backend/test/test_lowered_backend_module.py
+++ b/exir/backend/test/test_lowered_backend_module.py
@@ -58,7 +58,7 @@ def forward(self, *args):
 
         return (
             to_edge(
-                export(WrappedModule(), example_inputs),
+                export(WrappedModule(), example_inputs, strict=True),
                 compile_config=edge_compile_config,
             )
             .to_executorch()
@@ -78,10 +78,7 @@ def forward(self, x):
         model_inputs = (torch.ones(1),)
         expected_res = sin_module(*model_inputs)
         edgeir_m = to_edge(
-            export(
-                sin_module,
-                model_inputs,
-            ),
+            export(sin_module, model_inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False, _use_edge_ops=True
             ),
@@ -133,7 +130,8 @@ def test_emit_lowered_backend_module(self, unlift):
             model_inputs = model.get_random_inputs()
 
             edgeir_m = to_edge(
-                export(model, model_inputs), compile_config=edge_compile_config
+                export(model, model_inputs, strict=True),
+                compile_config=edge_compile_config,
             )
             lowered_model = to_backend(
                 QnnBackend.__name__, edgeir_m.exported_program(), []
@@ -189,7 +187,8 @@ def test_emit_nested_lowered_backend_module(self, unlift):
             model_inputs = model.get_random_inputs()
 
             edgeir_m = to_edge(
-                export(model, model_inputs), compile_config=edge_compile_config
+                export(model, model_inputs, strict=True),
+                compile_config=edge_compile_config,
             )
             lowered_module = to_backend(
                 QnnBackend.__name__, edgeir_m.exported_program(), []
@@ -206,7 +205,8 @@ def forward(self, *args):
 
             wrapped_module = WrappedModule(lowered_module)
             wrapped_module_edge = to_edge(
-                export(wrapped_module, model_inputs), compile_config=edge_compile_config
+                export(wrapped_module, model_inputs, strict=True),
+                compile_config=edge_compile_config,
             )
 
             nested_lowered_model = to_backend(
diff --git a/exir/backend/test/test_partitioner.py b/exir/backend/test/test_partitioner.py
index da1ae0444d..917dae32d7 100644
--- a/exir/backend/test/test_partitioner.py
+++ b/exir/backend/test/test_partitioner.py
@@ -77,7 +77,7 @@ def partition(
         mlp = MLP()
         example_inputs = mlp.get_random_inputs()
         model = export_for_training(mlp, example_inputs).module()
-        aten = export(model, example_inputs)
+        aten = export(model, example_inputs, strict=True)
         spec_key = "path"
         spec_value = "/a/b/c/d"
         spec = MappingProxyType({spec_key: spec_value})
@@ -138,7 +138,7 @@ def partition(
         mlp = MLP()
         example_inputs = mlp.get_random_inputs()
         model = export_for_training(mlp, example_inputs).module()
-        aten = export(model, example_inputs)
+        aten = export(model, example_inputs, strict=True)
         edge = exir.to_edge(aten)
 
         with self.assertRaisesRegex(
@@ -178,7 +178,7 @@ def partition(
         mlp = MLP()
         example_inputs = mlp.get_random_inputs()
         model = export_for_training(mlp, example_inputs).module()
-        edge = exir.to_edge(export(model, example_inputs))
+        edge = exir.to_edge(export(model, example_inputs, strict=True))
 
         with self.assertRaisesRegex(
             RuntimeError,
@@ -230,7 +230,7 @@ def partition(
                 )
 
         model = export_for_training(self.AddConst(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         delegated = edge.to_backend(PartitionerNoTagData())
 
         # Check Owning Program still owns all constant data
@@ -309,7 +309,7 @@ def partition(
                 )
 
         model = export_for_training(self.AddConst(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         delegated = edge.to_backend(PartitionerTagData())
 
         # Check Owning Program still owns all constant data
@@ -384,7 +384,7 @@ def partition(
                 )
 
         model = export_for_training(self.AddConst(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         delegated = edge.to_backend(PartitionerTagData())
 
         # Check Owning Program still owns only buffers
@@ -472,7 +472,7 @@ def partition(
 
         inputs = (torch.ones(2, 2),)
         model = export_for_training(ReuseConstData(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         exec_prog = edge.to_backend(PartitionerTagData()).to_executorch()
         executorch_module = _load_for_executorch_from_buffer(exec_prog.buffer)
         inputs_flattened, _ = tree_flatten(inputs)
@@ -532,7 +532,7 @@ def partition(
                 )
 
         model = export_for_training(ReuseConstData(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(export(model, (torch.ones(2, 2),), strict=True))
         with self.assertRaises(RuntimeError) as error:
             _ = edge.to_backend(PartitionerTagData())
 
@@ -558,10 +558,7 @@ def forward(self, x):
                 return y
 
         edge = exir.to_edge(
-            torch.export.export(
-                MutableStateModule(),
-                (torch.zeros(1),),
-            )
+            torch.export.export(MutableStateModule(), (torch.zeros(1),), strict=True)
         )
         self.assertGreater(
             len(edge.exported_program().graph_signature.buffers_to_mutate),
@@ -635,7 +632,9 @@ def forward(self, x):
 
         model_inputs = (torch.ones(3, 3),)
         orig_res = TestModule()(*model_inputs)
-        edge_program = exir.to_edge(torch.export.export(TestModule(), model_inputs))
+        edge_program = exir.to_edge(
+            torch.export.export(TestModule(), model_inputs, strict=True)
+        )
         lowered = edge_program.to_backend(AddAttributePartitionerDemo())
 
         self.assertTrue(
@@ -684,7 +683,7 @@ def forward(self, q, k_val, input_pos):
         model = Model()
         model.eval()
 
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
         exir_program_aten.module()(*example_inputs)
         edge_program_manager = exir.to_edge(exir_program_aten)
         lowered = edge_program_manager.to_backend(AllNodesPartitionerDemo())
@@ -726,7 +725,7 @@ def forward(self, x):
         model.eval()
 
         example_inputs = (torch.randn(SHAPE),)
-        exir_program_aten = torch.export.export(model, example_inputs)
+        exir_program_aten = torch.export.export(model, example_inputs, strict=True)
         edge_program_manager = exir.to_edge(exir_program_aten)
         with self.assertRaises(AssertionError):
             edge_program_manager.to_backend(AddAttributePartitionerDemo())
diff --git a/exir/backend/test/test_passes.py b/exir/backend/test/test_passes.py
index 4dcc7757fa..bc18f09023 100644
--- a/exir/backend/test/test_passes.py
+++ b/exir/backend/test/test_passes.py
@@ -18,7 +18,6 @@
 
 class TestPasses(unittest.TestCase):
     def test_duplicate_constant_node_pass(self):
-
         class ReuseConstData(torch.nn.Module):
             def __init__(self):
                 super().__init__()
@@ -30,7 +29,9 @@ def forward(self, x):
                 return y, z
 
         model = export_for_training(ReuseConstData(), (torch.ones(2, 2),)).module()
-        edge = exir.to_edge(torch.export.export(model, (torch.ones(2, 2),)))
+        edge = exir.to_edge(
+            torch.export.export(model, (torch.ones(2, 2),), strict=True)
+        )
 
         const_nodes = [
             node.name
diff --git a/exir/backend/test/test_utils.py b/exir/backend/test/test_utils.py
index 0fc522dd68..e449809ede 100644
--- a/exir/backend/test/test_utils.py
+++ b/exir/backend/test/test_utils.py
@@ -94,20 +94,14 @@ def forward(self, x, y):
 
         graph_module_1: torch.fx.GraphModule = (
             to_edge(
-                export(
-                    MyModule1(),
-                    (torch.rand(3, 4), torch.rand(3, 4)),
-                )
+                export(MyModule1(), (torch.rand(3, 4), torch.rand(3, 4)), strict=True)
             )
             .exported_program()
             .graph_module
         )
         graph_module_2: torch.fx.GraphModule = (
             to_edge(
-                export(
-                    MyModule2(),
-                    (torch.rand(3, 4), torch.rand(3, 4)),
-                )
+                export(MyModule2(), (torch.rand(3, 4), torch.rand(3, 4)), strict=True)
             )
             .exported_program()
             .graph_module
@@ -131,10 +125,7 @@ def forward(self, x):
 
         large_model = (
             to_edge(
-                export(
-                    LargeModel(),
-                    inputs,
-                ),
+                export(LargeModel(), inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             )
             .exported_program()
@@ -143,7 +134,7 @@ def forward(self, x):
 
         pattern = (
             to_edge(
-                export(torch.nn.Linear(3, 3), inputs),
+                export(torch.nn.Linear(3, 3), inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             )
             .exported_program()
@@ -179,10 +170,7 @@ def partition(
                 )
 
         exported_program = to_edge(
-            export(
-                torch.nn.Linear(3, 3),
-                (torch.randn(3, 3),),
-            )
+            export(torch.nn.Linear(3, 3), (torch.randn(3, 3),), strict=True)
         )
 
         error_msg = r"needs a `partition_tags` field containing a mapping of tags to delegate spec"
@@ -216,7 +204,7 @@ def forward(self, a, x, b):
 
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-        edge = to_edge(export(m, inputs))
+        edge = to_edge(export(m, inputs, strict=True))
         edge = edge.to_backend(AddMulPartitionerDemo())
         number_of_cpu_nodes = get_non_lowered_nodes(edge.exported_program().graph)
         # Only sub is not not lowerable
@@ -237,7 +225,7 @@ def forward(self, a, x, b):
 
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
-        edge = to_edge(export(m, inputs))
+        edge = to_edge(export(m, inputs, strict=True))
         edge = edge.to_backend(AddMulPartitionerDemo())
         number_of_delegates = get_delegates(edge.exported_program().graph)
         # there will be 2 delegates: (mm + add) -> sub -> (mm + add)
@@ -259,7 +247,9 @@ def forward(self, a, x, b):
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
 
-        edge = to_edge(export(m, inputs)).to_backend(AddMulPartitionerDemo())
+        edge = to_edge(export(m, inputs, strict=True)).to_backend(
+            AddMulPartitionerDemo()
+        )
 
         graph_str = format_delegated_graph(edge.exported_program().graph_module)
         self.assertIn(
diff --git a/exir/capture/_capture.py b/exir/capture/_capture.py
index 3c72256a33..975191f074 100644
--- a/exir/capture/_capture.py
+++ b/exir/capture/_capture.py
@@ -210,10 +210,11 @@ def capture(  # noqa: C901
                         cast(torch.nn.Module, f.__self__),
                         args,
                         dynamic_shapes=dynamic_shapes,
+                        strict=True,
                     )
             else:
                 mod = f if isinstance(f, torch.nn.Module) else WrapperModule(f)
-                ep = export(mod, args, dynamic_shapes=dynamic_shapes)
+                ep = export(mod, args, dynamic_shapes=dynamic_shapes, strict=True)
 
             ep = ep.run_decompositions(_default_decomposition_table())
             ep = _transform(ep, ReplaceViewOpsWithViewCopyOpsPass())
diff --git a/exir/emit/test/test_emit.py b/exir/emit/test/test_emit.py
index fc10c1db66..6aea0297f9 100644
--- a/exir/emit/test/test_emit.py
+++ b/exir/emit/test/test_emit.py
@@ -154,12 +154,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         program = (
-            to_edge(
-                export(
-                    f,
-                    (torch.ones(3, 2), torch.zeros(3, 2)),
-                )
-            )
+            to_edge(export(f, (torch.ones(3, 2), torch.zeros(3, 2)), strict=True))
             .to_executorch()
             .executorch_program
         )
@@ -180,7 +175,9 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
     def test_basic_end_to_end(self) -> None:
         f = models.BasicSinMax()
         program = (
-            to_edge(export(f, f.get_random_inputs())).to_executorch().executorch_program
+            to_edge(export(f, f.get_random_inputs(), strict=True))
+            .to_executorch()
+            .executorch_program
         )
         exec_plan = program.execution_plan[0]
         ops = exec_plan.operators
@@ -210,7 +207,7 @@ def forward(
         f = Foo()
 
         x = (torch.randn(100),)
-        program = to_edge(export(f, x)).to_executorch().executorch_program
+        program = to_edge(export(f, x, strict=True)).to_executorch().executorch_program
         exec_plan = program.execution_plan[0]
         self.assertEqual(len(exec_plan.outputs), 4)
         self.assertEqual(len(exec_plan.inputs), 1)
@@ -230,7 +227,7 @@ class M(torch.nn.Module):
             def forward(self, x):
                 return [((1, 3, 1.2), True, [x + x, x * x], None)]
 
-        ep = torch.export.export(M(), (torch.ones(2, 3),))
+        ep = torch.export.export(M(), (torch.ones(2, 3),), strict=True)
         res = ep.module()(torch.ones(2, 3))
         self.assertEqual(res[0][0], (1, 3, 1.2))
         program = to_edge(ep).to_executorch().executorch_program
@@ -251,7 +248,7 @@ class M(torch.nn.Module):
             def forward(self, x, y, z):
                 return x + y, x + x, x + y + z
 
-        ep = torch.export.export(M(), (torch.ones(2, 3), 2, True))
+        ep = torch.export.export(M(), (torch.ones(2, 3), 2, True), strict=True)
         ep.module()(torch.ones(2, 3), 2, True)
         program = to_edge(ep).to_executorch().executorch_program
         inputs = program.execution_plan[0].inputs
@@ -270,7 +267,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         inputs = (torch.ones((10, 10)),)
-        edge = to_edge(export(f, inputs))
+        edge = to_edge(export(f, inputs, strict=True))
 
         removed_ops = ["aten::relu_", "aten::view"]
         expected_ops = [
@@ -319,7 +316,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         inputs = (torch.ones(2, 2),)
 
-        program = to_edge(export(model, inputs)).to_executorch().executorch_program
+        program = (
+            to_edge(export(model, inputs, strict=True))
+            .to_executorch()
+            .executorch_program
+        )
 
         self.assertEqual(len(program.execution_plan[0].operators), 2)
 
@@ -333,9 +334,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         program = (
-            to_edge(export(f, (torch.randn(2, 3, 5),)))
-            .to_executorch()
-            .executorch_program
+            to_edge(export(f, (torch.randn(2, 3, 5),), strict=True))
+            .to_executorch().executorch_program
         )
         exir.print_program.pretty_print(program)
 
@@ -359,7 +359,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         program = (
-            to_edge(export(f, (torch.randn(3, 5),))).to_executorch().executorch_program
+            to_edge(export(f, (torch.randn(3, 5),), strict=True))
+            .to_executorch()
+            .executorch_program
         )
         # The value for beta should appear before alpha
         self.assertEqual(program.execution_plan[0].values[12].val, Int(3))
@@ -378,7 +380,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         x, _ = torch.sort(torch.randn(3, 4))
-        program = to_edge(export(f, (x,))).to_executorch().executorch_program
+        program = (
+            to_edge(export(f, (x,), strict=True)).to_executorch().executorch_program
+        )
         # The value for right should appear before side
         self.assertEqual(program.execution_plan[0].values[6].val, Bool(False))
         self.assertEqual(program.execution_plan[0].values[7].val, Bool(True))
@@ -402,7 +406,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         program = (
-            to_edge(export(f, (torch.ones(3), torch.ones(3))))
+            to_edge(export(f, (torch.ones(3), torch.ones(3)), strict=True))
             .to_executorch()
             .executorch_program
         )
@@ -429,7 +433,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(2, 2, dtype=torch.int32),)
 
         # Trace to FX Graph.
-        program = to_edge(export(model_out, inputs)).to_executorch().executorch_program
+        program = (
+            to_edge(export(model_out, inputs, strict=True))
+            .to_executorch()
+            .executorch_program
+        )
 
         self.assertEqual(len(program.execution_plan[0].chains[0].instructions), 2)
         self._assertCallLength(program, 0, 4)
@@ -449,7 +457,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         h = Foo()
 
         x = (torch.randn(3, 2),)
-        exec_prog = to_edge(export(h, x)).to_executorch(
+        exec_prog = to_edge(export(h, x, strict=True)).to_executorch(
             exir.ExecutorchBackendConfig(emit_stacktrace=True)
         )
         program = exec_prog.executorch_program
@@ -497,7 +505,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         h = Hoo()
 
         x = (torch.randn(3, 2),)
-        program = to_edge(export(h, x)).to_executorch().executorch_program
+        program = to_edge(export(h, x, strict=True)).to_executorch().executorch_program
 
         # Check the stacktrace is None since we did not specify to get the stacktrace
         self.assertTrue(program.execution_plan[0].chains[0].stacktrace is None)
@@ -512,9 +520,10 @@ def forward(self, x: torch.Tensor, n: torch.Tensor) -> torch.Tensor:
 
         x = torch.randn(3, 2)
         program = (
-            to_edge(export(f, (x, x)))
+            to_edge(export(f, (x, x), strict=True))
             # .to_edge(self.compile_config)  # TODO(larryliu): fix cat
-            .to_executorch().executorch_program
+            .to_executorch()
+            .executorch_program
         )
 
         self.assertEqual(len(program.execution_plan[0].chains[0].instructions), 1)
@@ -529,7 +538,7 @@ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         f = Foo()
 
         x = (torch.randn(10),)
-        program = to_edge(export(f, x)).to_executorch().executorch_program
+        program = to_edge(export(f, x, strict=True)).to_executorch().executorch_program
         self._assertCallLength(program, 0, 8)
 
     def test_emit_layout(self) -> None:
@@ -540,7 +549,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         x = (torch.randn(3, 2),)
-        program = to_edge(export(f, x)).to_executorch().executorch_program
+        program = to_edge(export(f, x, strict=True)).to_executorch().executorch_program
 
         vals = program.execution_plan[0].values
         for val in vals:
@@ -560,7 +569,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = (torch.triu(torch.ones(2, 2)),)
         program = (
             to_edge(
-                export(f, x),
+                export(f, x, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             )
             .to_executorch()
@@ -578,7 +587,9 @@ def forward(self, x):
                 return torch.nn.functional.interpolate(x, scale_factor=2)
 
         x = (torch.randn(1, 1, 2, 2),)
-        program = to_edge(export(M(), x)).to_executorch().executorch_program
+        program = (
+            to_edge(export(M(), x, strict=True)).to_executorch().executorch_program
+        )
         self.assertIsInstance(
             program.execution_plan[0].values[-1].val, schema.OptionalTensorList
         )
@@ -600,7 +611,9 @@ def false_fn(y: torch.Tensor) -> torch.Tensor:
                 ret = control_flow.cond(pred, true_fn, false_fn, [x])
                 return ret
 
-        module = to_edge(export(M(), (torch.tensor(True), torch.ones(2, 2))))
+        module = to_edge(
+            export(M(), (torch.tensor(True), torch.ones(2, 2)), strict=True)
+        )
         program = module.to_executorch().executorch_program
 
         num_mm = 0
@@ -635,7 +648,7 @@ def map_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
         inputs = (torch.ones(4, 4), torch.ones(4))
         module = to_edge(
-            export(f, inputs),
+            export(f, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         program = module.to_executorch().executorch_program
@@ -708,7 +721,7 @@ def map_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
         inputs = (torch.ones(4, 4), torch.ones(4))
         module = to_edge(
-            export(f, inputs),
+            export(f, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         _load_for_executorch_from_buffer(module.to_executorch().buffer)
@@ -725,7 +738,7 @@ def map_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
         inputs = (torch.ones(4, 4), torch.ones(4))
         module = to_edge(
-            export(f, inputs),
+            export(f, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         buffer = module.to_executorch().buffer
@@ -746,7 +759,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(10, 5),)
         program = (
             to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             )
             .to_executorch()
@@ -790,12 +803,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         f = Add()
 
-        edge_program_manager = to_edge(
-            export(
-                f,
-                (torch.ones(3, 2),),
-            )
-        )
+        edge_program_manager = to_edge(export(f, (torch.ones(3, 2),), strict=True))
         edge_program_manager._edge_programs["forward"] = constant_prop_pass(
             edge_program_manager.exported_program()
         )
@@ -805,12 +813,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             .non_const_buffer_sizes
         )
 
-        edge_program_manager = to_edge(
-            export(
-                f,
-                (torch.ones(3, 2),),
-            )
-        )
+        edge_program_manager = to_edge(export(f, (torch.ones(3, 2),), strict=True))
         non_const_buffer_size_without_const_prop_pass = (
             edge_program_manager.to_executorch()
             .executorch_program.execution_plan[0]
@@ -889,7 +892,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(10, 5),)
         try:
             to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             ).to_executorch()
         except:
@@ -908,7 +911,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(10, 5, 2, 1),)
         with self.assertRaises(InternalError):
             to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(
                     _check_ir_validity=False, _skip_dim_order=True
                 ),
@@ -916,7 +919,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         # Success if you use dim_order
         to_edge(
-            export(model, inputs),
+            export(model, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(
                 _check_ir_validity=False, _skip_dim_order=False
             ),
@@ -939,12 +942,12 @@ def forward_sigmoid(self, x: torch.Tensor) -> torch.Tensor:
         inputs = (torch.ones(10, 5),)
         with patch_forward(model, model.forward_relu):
             program_relu = to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             ).to_executorch()
         with patch_forward(model, model.forward_sigmoid):
             program_sigmoid = to_edge(
-                export(model, inputs),
+                export(model, inputs, strict=True),
                 compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
             ).to_executorch()
         exir_input = {
@@ -1003,9 +1006,11 @@ def forward_sigmoid(self, x: torch.Tensor) -> torch.Tensor:
         model = SimpleLinear()
         inputs = (torch.ones(10, 5),)
         with patch_forward(model, model.forward_relu):
-            program_relu = to_edge(export(model, inputs)).to_executorch()
+            program_relu = to_edge(export(model, inputs, strict=True)).to_executorch()
         with patch_forward(model, model.forward_sigmoid):
-            program_sigmoid = to_edge(export(model, inputs)).to_executorch()
+            program_sigmoid = to_edge(
+                export(model, inputs, strict=True)
+            ).to_executorch()
         exir_input = {
             "forward_relu": program_relu.exported_program(),
             "forward_sigmoid": program_sigmoid.exported_program(),
@@ -1056,10 +1061,7 @@ def make_program(
             inputs,
         ) -> "ExecutorchProgramManager":
             return to_edge(
-                export(
-                    WrapperModule(fn),
-                    inputs,
-                )
+                export(WrapperModule(fn), inputs, strict=True)
             ).to_executorch()
 
         program_a = make_program(model.a, inputs)
@@ -1106,11 +1108,7 @@ def forward(self, k: torch.Tensor) -> torch.Tensor:
         k = torch.rand(2, 4)
         dim0_k = Dim("dim0_k", max=3)
         dynamic_shapes = {"k": {0: dim0_k}}
-        captured = export(
-            func,
-            (k,),
-            dynamic_shapes=dynamic_shapes,
-        )
+        captured = export(func, (k,), dynamic_shapes=dynamic_shapes, strict=True)
         edge = to_edge(captured)
         from executorch.exir.passes import MemoryPlanningPass
 
@@ -1158,7 +1156,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         model = Simple()
         inputs = (torch.ones(10, 5),)
-        program = to_edge(export(model, inputs)).to_executorch()
+        program = to_edge(export(model, inputs, strict=True)).to_executorch()
         exir_input = {
             "forward": program.exported_program(),
         }
@@ -1232,7 +1230,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         self.assertEqual(len(merged_program.execution_plan[4].outputs), 2)
 
         merged_program = to_edge(
-            export(model, inputs), constant_methods=getters
+            export(model, inputs, strict=True), constant_methods=getters
         ).to_executorch()
         executorch_module = _load_for_executorch_from_buffer(merged_program.buffer)
         torch.allclose(executorch_module.run_method("get_tensor", [])[0], tensor_output)
@@ -1243,10 +1241,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
     def test_emit_debug_handle_map(self) -> None:
         mul_model = Mul()
         program_mul = to_edge(
-            export(
-                mul_model,
-                mul_model.get_random_inputs(),
-            )
+            export(mul_model, mul_model.get_random_inputs(), strict=True)
         ).to_executorch()
         # this triggers the actual emission of the graph
         program_mul._emitter_output.program
@@ -1263,10 +1258,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         mul_model = SimpleAddMul()
         program_mul = to_edge(
-            export(
-                mul_model,
-                (torch.ones(2, 2),),
-            )
+            export(mul_model, (torch.ones(2, 2),), strict=True)
         ).to_executorch()
 
         # this triggers the actual emission of the graph
@@ -1317,7 +1309,7 @@ def forward(self, x):
 
         inputs = ([torch.ones(2, 2), torch.ones(2, 2)],)
         model = TestModel()
-        edgeir_m = to_edge(export(model, inputs))
+        edgeir_m = to_edge(export(model, inputs, strict=True))
         lowered_module = to_backend(
             "BackendWithCompilerExample", edgeir_m.exported_program(), []
         )
@@ -1332,7 +1324,7 @@ def forward(self, list_a):
 
         composite_model = CompositeModule()
         exec_prog = to_edge(
-            export(composite_model, inputs),
+            export(composite_model, inputs, strict=True),
         ).to_executorch()
         exec_prog.buffer
 
@@ -1359,7 +1351,7 @@ def forward(self, input):  # a, x, b):
 
         model_inputs = ((torch.ones(2, 2), 2 * torch.ones(2, 2), 3 * torch.ones(2, 2)),)
         model = AddMulModule()
-        edgeir_m = to_edge(export(model, model_inputs))
+        edgeir_m = to_edge(export(model, model_inputs, strict=True))
         lowered_module = to_backend(
             "BackendWithCompilerExample", edgeir_m.exported_program(), []
         )
@@ -1374,7 +1366,7 @@ def forward(self, list_a):
 
         composite_model = CompositeModule()
         exec_prog = to_edge(
-            export(composite_model, model_inputs),
+            export(composite_model, model_inputs, strict=True),
         ).to_executorch()
         exec_prog.buffer
 
@@ -1401,7 +1393,7 @@ def forward(self, x, y):
 
         inputs = (torch.ones(2, 2), torch.ones(2, 2))
         model = TestModel()
-        edgeir_m = to_edge(export(model, inputs))
+        edgeir_m = to_edge(export(model, inputs, strict=True))
         lowered_module = to_backend(
             "BackendWithCompilerExample", edgeir_m.exported_program(), []
         )
@@ -1416,7 +1408,7 @@ def forward(self, x, y):
 
         composite_model = CompositeModule()
         exec_prog = to_edge(
-            export(composite_model, inputs),
+            export(composite_model, inputs, strict=True),
         ).to_executorch()
         # Reading the program triggers the call to emit_program underneath which
         # we need to be done for our test to succeed.
@@ -1449,12 +1441,7 @@ def forward(self, x):
         self.assertEqual(model.W1.untyped_storage().nbytes(), 8)
         self.assertEqual(model.W2.nbytes, 4)
         self.assertEqual(model.W2.untyped_storage().nbytes(), 8)
-        program = to_edge(
-            export(
-                model,
-                (torch.ones(1),),
-            )
-        ).to_executorch()
+        program = to_edge(export(model, (torch.ones(1),), strict=True)).to_executorch()
 
         program = program._emitter_output.program
         # each emitted weight is not a view
@@ -1471,12 +1458,7 @@ def forward(self, x):
                 return x + self.buf
 
         model = NonPersistentBuffer()
-        program = to_edge(
-            export(
-                model,
-                (torch.ones(1),),
-            )
-        ).to_executorch()
+        program = to_edge(export(model, (torch.ones(1),), strict=True)).to_executorch()
         program = program._emitter_output.program
         # confirm that the buffer was emitted
         self.assertEqual(len(program.constant_buffer), 2)
@@ -1494,10 +1476,7 @@ def forward(self, x):
         model = LiftedConstants()
 
         program = to_edge(
-            export(
-                model,
-                (torch.ones(3, 2),),
-            )
+            export(model, (torch.ones(3, 2),), strict=True)
         ).to_executorch()
 
         program = program._emitter_output.program
@@ -1527,12 +1506,7 @@ def forward(self, x):
                 self.state.add_(1)
                 return y
 
-        model = to_edge(
-            export(
-                MutableStateModule(),
-                (torch.zeros(1),),
-            )
-        )
+        model = to_edge(export(MutableStateModule(), (torch.zeros(1),), strict=True))
         model = model.to_executorch()
         model.dump_executorch_program(True)
         self.assertTrue(
@@ -1563,12 +1537,7 @@ def forward(self, x):
                 self.state.add_(1)
                 return y
 
-        model = to_edge(
-            export(
-                MutableStateModule(),
-                (torch.zeros(1),),
-            )
-        )
+        model = to_edge(export(MutableStateModule(), (torch.zeros(1),), strict=True))
         model = model.to_executorch(
             config=ExecutorchBackendConfig(
                 memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
@@ -1594,12 +1563,7 @@ def forward(self, x):
                 masked_weights = x.masked_fill(self.mask == 0, float("-inf"))
                 return masked_weights
 
-        model = to_edge(
-            export(
-                InfinityMaskModel(),
-                (torch.randn(2, 2),),
-            )
-        )
+        model = to_edge(export(InfinityMaskModel(), (torch.randn(2, 2),), strict=True))
 
         # Confirm that we can serialize the model with infinity in it.
         model = model.to_executorch()
@@ -1623,7 +1587,7 @@ def forward(self, x):
                 x.add_(1)
 
         model = to_edge(
-            export(MutateInputTensorModule(), (torch.zeros(1),))
+            export(MutateInputTensorModule(), (torch.zeros(1),), strict=True)
         ).to_executorch(
             config=ExecutorchBackendConfig(
                 memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False)
@@ -1643,7 +1607,9 @@ def __init__(self):
             def forward(self, x):
                 return self.linear(x)
 
-        model = to_edge(export(LinearModule(), (torch.ones(5, 5),))).to_executorch(
+        model = to_edge(
+            export(LinearModule(), (torch.ones(5, 5),), strict=True)
+        ).to_executorch(
             config=ExecutorchBackendConfig(
                 external_constants=True,
             )
diff --git a/exir/program/test/test_fake_program.py b/exir/program/test/test_fake_program.py
index 15959efde4..5ad5d102b4 100644
--- a/exir/program/test/test_fake_program.py
+++ b/exir/program/test/test_fake_program.py
@@ -30,8 +30,7 @@ def forward(self, arg) -> torch.Tensor:
 
     linear = Linear()
     exported_program = export(
-        linear,
-        args=(torch.randn(10, 10),),
+        linear, args=(torch.randn(10, 10),), strict=True
     ).run_decompositions()
     return exported_program
 
diff --git a/exir/program/test/test_program.py b/exir/program/test/test_program.py
index d07972f971..046ad03e75 100644
--- a/exir/program/test/test_program.py
+++ b/exir/program/test/test_program.py
@@ -166,11 +166,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             torch.ones(1),
             torch.zeros(1),
         ),
+        strict=True,
     ).run_decompositions()
-    programs["foo"] = export(
-        foo,
-        (torch.ones(1),),
-    ).run_decompositions()
+    programs["foo"] = export(foo, (torch.ones(1),), strict=True).run_decompositions()
     return programs
 
 
@@ -289,7 +287,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
                 return x * 3.14
 
         mul = Mul()
-        ep = to_edge(torch.export.export(mul, (torch.ones(1),))).exported_program()
+        ep = to_edge(
+            torch.export.export(mul, (torch.ones(1),), strict=True)
+        ).exported_program()
         for node in ep.graph.nodes:
             self.assertNotEqual(node.op, "get_attr")
         self.assertEqual(
@@ -306,7 +306,7 @@ def forward(self, x, y):
                 torch._check(z < 4)
                 return x[z : z + y.shape[0]]
 
-        ep = torch.export.export(M(), (torch.randn(10), torch.tensor([3])))
+        ep = torch.export.export(M(), (torch.randn(10), torch.tensor([3])), strict=True)
 
         edge_manager = to_edge(
             ep, compile_config=exir.EdgeCompileConfig(_check_ir_validity=False)
@@ -350,7 +350,6 @@ def test_edge_manager_transform(self):
         )
 
     def test_issue_3659(self):
-
         class Mul(torch.nn.Module):
             def __init__(self):
                 super(Mul, self).__init__()
@@ -371,7 +370,10 @@ def get_dynamic_shapes(self):
 
         model = Mul()
         ep = torch.export.export(
-            model, model.get_example_inputs(), dynamic_shapes=model.get_dynamic_shapes()
+            model,
+            model.get_example_inputs(),
+            dynamic_shapes=model.get_dynamic_shapes(),
+            strict=True,
         )
 
         to_edge(
@@ -549,7 +551,7 @@ def _test_edge_dialect_verifier(
         if not isinstance(callable, torch.nn.Module):
             callable = WrapperModule(callable)
 
-        exported_foo = export(callable, inputs)
+        exported_foo = export(callable, inputs, strict=True)
         _ = to_edge(exported_foo, compile_config=edge_compile_config)
 
     def test_edge_dialect_custom_op(self):
@@ -697,7 +699,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         from torch._export.verifier import SpecViolationError
 
         input = torch.arange(9, dtype=torch.float) - 4
-        ep = torch.export.export(LinalgNorm(), (input,))
+        ep = torch.export.export(LinalgNorm(), (input,), strict=True)
 
         # aten::linalg_norm is not a core op, so it should error out
         with self.assertRaises(SpecViolationError):
@@ -744,7 +746,7 @@ def count_nodes(graph_module, target):
 
     def test_to_edge_with_single_preserved_op(self):
         model = TestLinear()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
 
         ops_not_to_decompose = [
             torch.ops.aten.linear.default,
@@ -759,7 +761,7 @@ def test_to_edge_with_single_preserved_op(self):
 
     def test_to_edge_with_partial_ops_preserved(self):
         model = TestLinearSDPACombined()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
 
         ops_not_to_decompose = [
             torch.ops.aten.linear.default,
@@ -774,7 +776,7 @@ def test_to_edge_with_partial_ops_preserved(self):
 
     def test_to_edge_with_multiple_ops_preserved(self):
         model = TestLinearSDPACombined()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
 
         ops_not_to_decompose = [
             torch.ops.aten.linear.default,
@@ -791,7 +793,7 @@ def test_to_edge_with_multiple_ops_preserved(self):
 
     def test_to_edge_with_preserved_ops_not_in_model(self):
         model = TestSDPA()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
 
         ops_not_to_decompose = [
             torch.ops.aten.linear.default,
@@ -806,7 +808,7 @@ def test_to_edge_with_preserved_ops_not_in_model(self):
 
     def test_save_fails(self):
         model = TestLinear()
-        program = torch.export.export(model, model._get_random_inputs())
+        program = torch.export.export(model, model._get_random_inputs(), strict=True)
         edge = to_edge(program)
         et = edge.to_executorch()
         with self.assertRaises(ValueError):
diff --git a/exir/tests/models.py b/exir/tests/models.py
index d3b68485b9..b4939ecfb0 100644
--- a/exir/tests/models.py
+++ b/exir/tests/models.py
@@ -173,10 +173,7 @@ def get_random_inputs(self) -> Tuple[Tensor, Tensor]:
 
         delegated_m = DelegateAdd()
         edge_ir_m = to_edge(
-            export(
-                delegated_m,
-                delegated_m.get_random_inputs(),
-            )
+            export(delegated_m, delegated_m.get_random_inputs(), strict=True)
         )
         lowered_module = LoweredBackendModule(
             edge_program=edge_ir_m.exported_program(),
diff --git a/exir/tests/test_arg_validator.py b/exir/tests/test_arg_validator.py
index a22544d37a..d85ef81b90 100644
--- a/exir/tests/test_arg_validator.py
+++ b/exir/tests/test_arg_validator.py
@@ -31,7 +31,7 @@ def forward(self, x):
 
         m = TestModel()
         inputs = (torch.randn(1, 3, 100, 100).to(dtype=torch.int),)
-        egm = to_edge(export(m, inputs)).exported_program().graph_module
+        egm = to_edge(export(m, inputs, strict=True)).exported_program().graph_module
         validator = EdgeOpArgValidator(egm)
         validator.run(*inputs)
         self.assertEqual(len(validator.violating_ops), 0)
@@ -49,7 +49,7 @@ def forward(self, x):
         inputs = (torch.randn(1, 3, 100, 100).to(dtype=torch.bfloat16),)
         egm = (
             to_edge(
-                export(M(), inputs),
+                export(M(), inputs, strict=True),
                 compile_config=EdgeCompileConfig(_check_ir_validity=False),
             )
             .exported_program()
diff --git a/exir/tests/test_delegate.py b/exir/tests/test_delegate.py
index 713e4b0941..d89d3f2bbd 100644
--- a/exir/tests/test_delegate.py
+++ b/exir/tests/test_delegate.py
@@ -45,7 +45,7 @@ def g(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
             return x + y
 
         inputs = (torch.ones(1, 3), torch.ones(1, 3))
-        edge_ir_m = to_edge(export(WrapperModule(g), inputs))
+        edge_ir_m = to_edge(export(WrapperModule(g), inputs, strict=True))
         lowered_module: LoweredBackendModule = LoweredBackendModule(
             edge_ir_m.exported_program(), "BackendWithCompilerDemo", b"moo", []
         )
@@ -54,10 +54,7 @@ def f(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
             return torch.ops.higher_order.executorch_call_delegate(lowered_module, x, y)
 
         orig_res = f(*inputs)
-        gm = export(
-            WrapperModule(f),
-            inputs,
-        )
+        gm = export(WrapperModule(f), inputs, strict=True)
         FileCheck().check("lowered_module_0").check(
             "torch.ops.higher_order.executorch_call_delegate"
         ).run(gm.graph_module.code)
@@ -69,7 +66,7 @@ def test_to_backend(self) -> None:
         m = models.CompositeDelegateModule()
 
         exec_prog = to_edge(
-            export(m, m.get_random_inputs()),
+            export(m, m.get_random_inputs(), strict=True),
             compile_config=EdgeCompileConfig(_check_ir_validity=False),
         ).to_executorch()  # TODO(larryliu): fix split_copy.Tensor
         graph_module = exec_prog.exported_program().graph_module
@@ -165,7 +162,7 @@ def forward(self, x, y):
                 return x
 
         orig_res = Model()(*inputs)
-        prog = to_edge(export(Model(), inputs))
+        prog = to_edge(export(Model(), inputs, strict=True))
         gm = prog.exported_program().graph_module
 
         node_list = []
@@ -225,7 +222,7 @@ def forward(self, x, y):
                 return x
 
         orig_res = Model()(*inputs)
-        prog = to_edge(export(Model(), inputs))
+        prog = to_edge(export(Model(), inputs, strict=True))
         gm = prog.exported_program().graph_module
 
         node_list = []
@@ -284,7 +281,7 @@ def forward(self, x, y):
                 return x
 
         orig_res = Model()(*inputs)
-        prog = to_edge(export(Model(), inputs))
+        prog = to_edge(export(Model(), inputs, strict=True))
         gm = prog.exported_program().graph_module
 
         node_list = []
diff --git a/exir/tests/test_dynamic_shape_propagation.py b/exir/tests/test_dynamic_shape_propagation.py
index 01c2f2b29a..3dbdf0b5f4 100644
--- a/exir/tests/test_dynamic_shape_propagation.py
+++ b/exir/tests/test_dynamic_shape_propagation.py
@@ -22,7 +22,12 @@ def test_repeat(self):
         inputs = inputs[0], inputs[1]
 
         prog = to_edge(
-            export(eager_model, inputs, dynamic_shapes=eager_model.get_dynamic_shape()),
+            export(
+                eager_model,
+                inputs,
+                dynamic_shapes=eager_model.get_dynamic_shape(),
+                strict=True,
+            ),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
 
@@ -48,7 +53,7 @@ def test_unbacked_symint(self):
         inputs = inputs[0], inputs[1]
 
         prog = to_edge(
-            export(eager_model, inputs, dynamic_shapes=None),
+            export(eager_model, inputs, dynamic_shapes=None, strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         new_prog = prog.transform([SpecPropPass(), HintBasedSymShapeEvalPass()])
diff --git a/exir/tests/test_memory_format_ops_pass.py b/exir/tests/test_memory_format_ops_pass.py
index 0292cf98f5..76e994abdb 100644
--- a/exir/tests/test_memory_format_ops_pass.py
+++ b/exir/tests/test_memory_format_ops_pass.py
@@ -269,7 +269,7 @@ def call_operator(self, op, args, kwargs, meta):
         _to_dim_order_op_str = "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default"
 
         before_epm = to_edge(
-            export(toy_model, sample_input),
+            export(toy_model, sample_input, strict=True),
             compile_config=EdgeCompileConfig(_skip_dim_order=False),
         )
 
diff --git a/exir/tests/test_memory_format_ops_pass_utils.py b/exir/tests/test_memory_format_ops_pass_utils.py
index 3049f30a8c..8bf810e847 100644
--- a/exir/tests/test_memory_format_ops_pass_utils.py
+++ b/exir/tests/test_memory_format_ops_pass_utils.py
@@ -104,7 +104,9 @@ class MemoryFormatOpsPassTestUtils:
     def memory_format_test_runner(
         test_class: unittest.TestCase, test_set: MemoryFormatTestSet
     ):
-        before = export(test_set.module, test_set.sample_input).run_decompositions({})
+        before = export(
+            test_set.module, test_set.sample_input, strict=True
+        ).run_decompositions({})
 
         if test_set.use_xnnpack:
             epm = to_edge_transform_and_lower(
diff --git a/exir/tests/test_memory_planning.py b/exir/tests/test_memory_planning.py
index 5e4573a2ba..1f94f0341f 100644
--- a/exir/tests/test_memory_planning.py
+++ b/exir/tests/test_memory_planning.py
@@ -239,12 +239,7 @@ def wrapper(self: "TestMemoryPlanning") -> None:
             #  torch._tensor.Tensor]` is not a function.
             inputs = eager_module.get_random_inputs()
             graph_module = (
-                to_edge(
-                    export(
-                        eager_module,
-                        inputs,
-                    )
-                )
+                to_edge(export(eager_module, inputs, strict=True))
                 .exported_program()
                 .graph_module
             )
@@ -491,10 +486,7 @@ def test_multiple_pools(
         expected_bufsizes: List[int],
     ) -> None:
         edge_program = to_edge(
-            export(
-                MultiplePoolsToyModel(),
-                (torch.ones(1),),
-            )
+            export(MultiplePoolsToyModel(), (torch.ones(1),), strict=True)
         )
 
         edge_program.to_executorch(
@@ -538,7 +530,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
                 return torch.nn.functional.sigmoid(self.linear(x) + self.constant + 1)
 
         def count_planned_inputs(
-            nodes: List[Node], graph_signature: Any  # pyre-ignore
+            nodes: List[Node],
+            graph_signature: Any,  # pyre-ignore
         ) -> Tuple[int, int]:
             num_mem_planned_placeholders = 0
             num_placeholders = 0
@@ -555,7 +548,9 @@ def count_planned_inputs(
         model = Simple()
         inputs = (torch.randn(5, 5),)
 
-        ep_no_input_planning = to_edge(export(model, inputs)).to_executorch(
+        ep_no_input_planning = to_edge(
+            export(model, inputs, strict=True)
+        ).to_executorch(
             config=ExecutorchBackendConfig(
                 memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
                 sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
@@ -575,7 +570,7 @@ def count_planned_inputs(
             5,  # x, self.constant, linear weight, linear bias, '1' scalar promoted to tensor
         )
 
-        ep_input_planning = to_edge(export(model, inputs)).to_executorch(
+        ep_input_planning = to_edge(export(model, inputs, strict=True)).to_executorch(
             config=ExecutorchBackendConfig(
                 memory_planning_pass=MemoryPlanningPass(alloc_graph_input=True),
                 sym_shape_eval_pass=ConstraintBasedSymShapeEvalPass(),
@@ -609,7 +604,7 @@ def forward(self, a, b, x):
 
         model = TestModel()
         example_inputs = (torch.rand(1, 6, 2), torch.rand(1, 6, 2), torch.randn(5, 5))
-        exported_model = torch.export.export(model, example_inputs)
+        exported_model = torch.export.export(model, example_inputs, strict=True)
         edge = to_edge(exported_model)
 
         class TestPass(ExportPass):
diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py
index 4d2c17086b..ff076a7345 100644
--- a/exir/tests/test_passes.py
+++ b/exir/tests/test_passes.py
@@ -133,12 +133,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
         int_tensor = torch.tensor([[1, 2, 3]])
         float_tensor = torch.tensor([[1.0, 2.0, 3.0]])
-        edge_prog = to_edge(
-            export(
-                add,
-                (int_tensor, float_tensor),
-            )
-        )
+        edge_prog = to_edge(export(add, (int_tensor, float_tensor), strict=True))
 
         new_prog = edge_prog.transform([RemoveMixedTypeOperators()])
         new_graph_module = new_prog.exported_program().graph_module
@@ -161,7 +156,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         double_tensor = torch.tensor([[1.0, 2.0, 3.0]])
         double_tensor = double_tensor.to(torch.double)
 
-        double_prog = to_edge(export(add, (int_tensor, double_tensor)))
+        double_prog = to_edge(export(add, (int_tensor, double_tensor), strict=True))
 
         double_prog.transform([RemoveMixedTypeOperators()])
         new_graph_module_double = double_prog.exported_program().graph_module
@@ -188,12 +183,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         mult = Mult()
 
         float_tensor_vert = float_tensor.T
-        mult_prog = to_edge(
-            export(
-                mult,
-                (int_tensor, float_tensor_vert),
-            )
-        )
+        mult_prog = to_edge(export(mult, (int_tensor, float_tensor_vert), strict=True))
 
         # graph_module_mult.graph.print_tabular()
 
@@ -224,10 +214,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         # Turn off functionalization so that we can get the actual to.dtype op
         edge_prog = to_edge(
-            export(
-                foo,
-                (torch.ones(1, dtype=torch.float32),),
-            )
+            export(foo, (torch.ones(1, dtype=torch.float32),), strict=True)
         )
         edge_prog = edge_prog.transform([RemoveNoopPass()])
         self.assertIsNotNone(edge_prog.exported_program().graph_module)
@@ -257,36 +244,21 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         # Turn off functionalization so that we can get the actual to.dtype op
         x = torch.ones((3, 8, 8))
-        prog = to_edge(
-            export(
-                foo_with_no_slice,
-                (x,),
-            )
-        )
+        prog = to_edge(export(foo_with_no_slice, (x,), strict=True))
         prog = prog.transform([RemoveNoopPass()])
         new_graph_module = prog.exported_program().graph_module
         FileCheck().check_count(
             "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor", 0, exactly=True
         ).run(new_graph_module.code)
 
-        prog = to_edge(
-            export(
-                foo_with_one_slice,
-                (x,),
-            )
-        )
+        prog = to_edge(export(foo_with_one_slice, (x,), strict=True))
         prog = prog.transform([RemoveNoopPass()])
         new_graph_module = prog.exported_program().graph_module
         FileCheck().check_count(
             "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor", 1, exactly=True
         ).run(new_graph_module.code)
 
-        prog = to_edge(
-            export(
-                foo_with_all_slices,
-                (x,),
-            )
-        )
+        prog = to_edge(export(foo_with_all_slices, (x,), strict=True))
         prog = prog.transform([RemoveNoopPass()])
         new_graph_module = prog.exported_program().graph_module
         FileCheck().check_count(
@@ -302,12 +274,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         x = (torch.randn(2, 3),)
 
-        to_edge(
-            export(
-                f,
-                x,
-            )
-        ).exported_program().graph_module
+        to_edge(export(f, x, strict=True)).exported_program().graph_module
         # TODO(angelayi): Add a utility function that verifies a model is in
         # the edge dialect
 
@@ -335,12 +302,8 @@ def forward(self, x_raw, h, c):
         composite_m = CompositeModel(3)
 
         edge_prog = to_edge(
-            export(
-                composite_m,
-                inputs,
-            )
+            export(composite_m, inputs, strict=True),
             # torch._ops.aten.t.default
-            ,
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
 
@@ -380,10 +343,7 @@ def get_random_inputs(self):
         model = MyModel()
         inputs = model.get_random_inputs()
         prog = to_edge(
-            export(
-                model,
-                inputs,
-            ),
+            export(model, inputs, strict=True),
             compile_config=EdgeCompileConfig(_check_ir_validity=False),
         )  # TODO(larryliu): fix split_copy
         new_gm_res = ToOutVarPass()(prog.exported_program().graph_module)
@@ -415,10 +375,7 @@ def get_random_inputs(self):
         model = MyModel()
         inputs = model.get_random_inputs()
         prog = to_edge(
-            export(
-                model,
-                inputs,
-            ),
+            export(model, inputs, strict=True),
             compile_config=EdgeCompileConfig(_check_ir_validity=False),
         )  # TODO(larryliu): fix topk
         new_gm_res = ToOutVarPass()(prog.exported_program().graph_module)
@@ -449,12 +406,7 @@ def forward(self, x):
         inputs = torch.tensor(1.0, dtype=torch.float)
         model_res = model(inputs)
 
-        edge_dialect = to_edge(
-            export(
-                model,
-                (inputs,),
-            )
-        )
+        edge_dialect = to_edge(export(model, (inputs,), strict=True))
         edge_res = edge_dialect.exported_program().module()(inputs)
         self.assertTrue(torch.allclose(model_res, edge_res))
 
@@ -470,10 +422,7 @@ class NullPass(ExportPass):
             pass
 
         prog = to_edge(
-            export(
-                f,
-                (torch.ones(3, 2),),
-            ),
+            export(f, (torch.ones(3, 2),), strict=True),
             compile_config=EdgeCompileConfig(_check_ir_validity=False),
         )  # TODO(larryliu): fix cat
         new_prog = prog.transform([NullPass()])
@@ -502,10 +451,7 @@ class NullPass(ExportPass):
             pass
 
         prog = to_edge(
-            export(
-                f,
-                (torch.ones(3, 2),),
-            ),
+            export(f, (torch.ones(3, 2),), strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         new_prog = prog.transform([NullPass()])
@@ -529,7 +475,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         mul = Mul()
 
-        expo_prog = to_edge(export(mul, (torch.ones(1),)))
+        expo_prog = to_edge(export(mul, (torch.ones(1),), strict=True))
         new_prog = expo_prog.transform([ScalarToTensorPass()])
         self.assertIsNotNone(new_prog.exported_program().graph_module)
         new_graph_module = new_prog.exported_program().graph_module
@@ -561,12 +507,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         example_inputs = (torch.randn(2, 3, 4, 5),)
 
-        gm = to_edge(
-            export(
-                f,
-                example_inputs,
-            )
-        )
+        gm = to_edge(export(f, example_inputs, strict=True))
         new_gm = gm.transform(
             [ReplaceSymSizeOpPass(), ScalarToTensorPass(), RemoveMixedTypeOperators()]
         )
@@ -587,12 +528,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         gm = (
-            to_edge(
-                export(
-                    f,
-                    (torch.ones(3, 2),),
-                )
-            )
+            to_edge(export(f, (torch.ones(3, 2),), strict=True))
             .exported_program()
             .graph_module
         )
@@ -616,12 +552,7 @@ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor]:
         f = Foo()
 
         gm = (
-            to_edge(
-                export(
-                    f,
-                    (torch.ones(3, 2),),
-                )
-            )
+            to_edge(export(f, (torch.ones(3, 2),), strict=True))
             .exported_program()
             .graph_module
         )
@@ -655,10 +586,7 @@ def forward(self, inp: torch.Tensor) -> torch.Tensor:
 
         # ReplaceBrokenOpsWithFunctionalOpsPass is used in to_edge()
         prog = to_edge(
-            export(
-                f,
-                (x,),
-            ),
+            export(f, (x,), strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         gm = prog.exported_program().graph_module
@@ -681,9 +609,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         prog = to_edge(
             export(
-                f,
-                (torch.ones(3, 2),),
-                dynamic_shapes={"x": {0: dim_x}},
+                f, (torch.ones(3, 2),), dynamic_shapes={"x": {0: dim_x}}, strict=True
             ),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
@@ -703,10 +629,7 @@ def test_alloc_node_spec(self) -> None:
         eager_model = FTMapBasic()
         inputs = eager_model.get_random_inputs()
         prog = to_edge(
-            export(
-                eager_model,
-                inputs,
-            ),
+            export(eager_model, inputs, strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         passes = [
@@ -755,10 +678,7 @@ def test_debug_pass_file_log(self) -> None:
     def test_dce_recursive(self) -> None:
         eager_model = FTCondDeadCode()
         inputs = eager_model.get_random_inputs()
-        gm = export(
-            eager_model,
-            inputs,
-        ).graph_module
+        gm = export(eager_model, inputs, strict=True).graph_module
 
         self.assertTrue(torch.ops.aten.sub.Tensor in collect_ops(gm))
         dead_code_elimination_pass(gm)
@@ -776,10 +696,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         prog = to_edge(
-            export(
-                f,
-                (torch.rand(5),),
-            ),
+            export(f, (torch.rand(5),), strict=True),
             # missing dispatch key
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         ).transform(propagate_dynamic_shape())
@@ -807,9 +724,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         dim_x = torch.export.Dim("dim_x", max=3)
         prog = to_edge(
             export(
-                f,
-                (torch.ones(3, 2),),
-                dynamic_shapes={"x": {0: dim_x}},
+                f, (torch.ones(3, 2),), dynamic_shapes={"x": {0: dim_x}}, strict=True
             ),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
@@ -839,16 +754,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         f = Foo()
 
-        gm = (
-            to_edge(
-                export(
-                    f,
-                    (x,),
-                )
-            )
-            .exported_program()
-            .graph_module
-        )
+        gm = to_edge(export(f, (x,), strict=True)).exported_program().graph_module
         for node in gm.graph.nodes:
             if node.op == "call_function":
                 self.assertEqual(type(node.target), EdgeOpOverload)
@@ -871,6 +777,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
                 torch.randn(2, 2),
                 torch.randn(2, 2),
             ),
+            strict=True,
         )
         # should look like:
         # graph():
@@ -934,6 +841,7 @@ def call_operator(self, op, args, kwargs, meta):
                     torch.randn(2, 2),
                     torch.randn(2, 2),
                 ),
+                strict=True,
             )
         )
         # Retrace-able, the graph "promote" back to ATen dialect, showing up add and relu, which is expected.
@@ -946,12 +854,7 @@ def test_debug_handle_generator_pass(self) -> None:
         inputs = eager_model.get_random_inputs()
 
         graph_module = (
-            to_edge(
-                export(
-                    eager_model,
-                    inputs,
-                )
-            )
+            to_edge(export(eager_model, inputs, strict=True))
             .exported_program()
             .graph_module
         )
@@ -965,12 +868,7 @@ def test_generate_missing_debug_handles(self) -> None:
         eager_model = MLP(2, output_size=4)
         inputs = eager_model.get_random_inputs()
 
-        ep = to_edge(
-            export(
-                eager_model,
-                inputs,
-            )
-        ).exported_program()
+        ep = to_edge(export(eager_model, inputs, strict=True)).exported_program()
 
         list(ep.graph.nodes)[0].meta.pop("debug_handle")
         self.assertTrue(list(ep.graph.nodes)[0].meta.get("debug_handle") is None)
@@ -1021,12 +919,7 @@ def forward(
             torch.ones(2, 2),
         )
 
-        ep = to_edge(
-            export(
-                f,
-                inputs,
-            )
-        ).exported_program()
+        ep = to_edge(export(f, inputs, strict=True)).exported_program()
         graph_module = ep.graph_module
 
         def check_debug_handle_metadata(graph_module: torch.fx.GraphModule) -> None:
@@ -1061,9 +954,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         dim_x = torch.export.Dim("dim_x", max=3)
         prog = to_edge(
             export(
-                f,
-                (torch.ones(3, 2),),
-                dynamic_shapes={"x": {0: dim_x}},
+                f, (torch.ones(3, 2),), dynamic_shapes={"x": {0: dim_x}}, strict=True
             ),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
@@ -1093,10 +984,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         f = Foo()
 
         gm = to_edge(
-            export(
-                f,
-                (torch.randn(5),),
-            ),
+            export(f, (torch.randn(5),), strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         new_gm = gm.transform([RemoveGraphAssertsPass()])
@@ -1117,12 +1005,7 @@ def __init__(self):
             def forward(self, x):
                 return torch.arange(start=0, end=2) + x
 
-        _ = to_edge(
-            export(
-                M(),
-                (torch.randn(2),),
-            )
-        ).to_executorch()
+        _ = to_edge(export(M(), (torch.randn(2),), strict=True)).to_executorch()
 
     def test_replace_slice(self) -> None:
         class M(torch.nn.Module):
@@ -1134,12 +1017,7 @@ def forward(self, x):
                 return self.a[:2] + x
 
         gm = (
-            to_edge(
-                export(
-                    M(),
-                    (torch.randn(2),),
-                )
-            )
+            to_edge(export(M(), (torch.randn(2),), strict=True))
             .exported_program()
             .graph_module
         )
@@ -1155,7 +1033,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         add = Add()
 
         edge = to_edge(
-            export(add, (torch.ones(1),)),
+            export(add, (torch.ones(1),), strict=True),
             compile_config=EdgeCompileConfig(_skip_dim_order=False),
         )
         edge = edge.transform([ScalarToTensorPass(), RemoveMixedTypeOperators()])
@@ -1193,10 +1071,7 @@ def forward(self, x):
                 c = torch.cat([self.a, b])
                 return (c + c) + x
 
-        aten = export(
-            M(),
-            (torch.zeros(2, 2, 3),),
-        )
+        aten = export(M(), (torch.zeros(2, 2, 3),), strict=True)
         self.assertEqual(count_additions(aten.graph_module), 3)
         new_ep = constant_prop_pass(aten)
         self.assertEqual(count_additions(new_ep.graph_module), 1)
@@ -1217,10 +1092,7 @@ def forward(self, x):
                 c = torch.cat([self.a, b])
                 return (c + c) + x
 
-        aten = export(
-            M(),
-            (torch.zeros(2, 2, 3),),
-        )
+        aten = export(M(), (torch.zeros(2, 2, 3),), strict=True)
         # Input signature will have two entries:
         # (1) parameter `a` and (2) user input `x`.
         self.assertEqual(len(aten.graph_signature.input_specs), 2)
@@ -1298,7 +1170,7 @@ def forward(self, query, key, value):
         m = convert_pt2e(m)
 
         # export, perform constant propagation to make weights const
-        aten_prog = export(m, (query, key, value))
+        aten_prog = export(m, (query, key, value), strict=True)
         aten_prog = constant_prop_pass(aten_prog)
 
         # lower to edge dialect
@@ -1332,10 +1204,7 @@ def forward(self, x):
                 slice_tensor = torch.slice_copy(self.a, dim=0, start=0, end=1)
                 return torch.cat([x, slice_tensor])
 
-        aten = export(
-            M(),
-            (torch.zeros(2, 2, 2),),
-        )
+        aten = export(M(), (torch.zeros(2, 2, 2),), strict=True)
         self.assertIn("a", aten.state_dict)
         self.assertEqual(count_slice(aten.graph_module), 1)
 
@@ -1360,10 +1229,7 @@ def forward(self, x, y):
                 # y is unused.
                 return x + self.a
 
-        aten = export(
-            M(),
-            (torch.zeros(3, 2, 4), torch.zeros(3, 2, 4)),
-        )
+        aten = export(M(), (torch.zeros(3, 2, 4), torch.zeros(3, 2, 4)), strict=True)
         self.assertIn("a", aten.state_dict)
         self.assertEqual(count_placeholder(aten.graph_module), 3)
 
@@ -1401,7 +1267,7 @@ def forward(self, pred, x):
         x = torch.randn([3, 3])
         pred = torch.tensor(x[0][0].item() < 0)
         edge = to_edge(
-            export(mod, (pred, x)),
+            export(mod, (pred, x), strict=True),
             compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
         )
         error_msg = r"constant_prop_pass for control flow is not supported yet."
@@ -1429,12 +1295,7 @@ def forward(self, x):
                 self.state.add_(1)
                 return y
 
-        model = to_edge(
-            export(
-                MutableStateModule(),
-                (torch.zeros(1),),
-            )
-        )
+        model = to_edge(export(MutableStateModule(), (torch.zeros(1),), strict=True))
         self.assertEqual(count_copies(model.exported_program().graph_module), 0)
         # Before
         # graph():
@@ -1516,7 +1377,7 @@ def quantize_model(
             quantizer.set_global(quantization_config)
             m = prepare_pt2e(m, quantizer)  # pyre-fixme[6]
             m = convert_pt2e(m, fold_quantize=True)
-            ep = torch.export.export(m, example_inputs)
+            ep = torch.export.export(m, example_inputs, strict=True)
             dq_nodes_pre = count_dq_nodes(ep.graph_module)
             q_nodes_pre = count_q_nodes(ep.graph_module)
             edge = to_edge(
@@ -1573,7 +1434,7 @@ def forward(self, x):
 
         model = TestDqQ()
         m_eager = model.eval()
-        ep = torch.export.export(m_eager, (torch.randn(9, 8),))
+        ep = torch.export.export(m_eager, (torch.randn(9, 8),), strict=True)
         edge = to_edge(ep)
         # Check that the dq and q nodes are not touched by the RemoveNoopPass.
         self.assertTrue(
@@ -1606,7 +1467,7 @@ def forward(self, x):
 
         model = TestDqQDifferentQParam()
         m_eager = model.eval()
-        ep = torch.export.export(m_eager, (torch.randn(9, 8),))
+        ep = torch.export.export(m_eager, (torch.randn(9, 8),), strict=True)
         edge = to_edge(ep)
         print(edge.exported_program().graph_module.graph)
         # Check that the dq and q nodes are not touched by the RemoveNoopPass.
@@ -1630,7 +1491,6 @@ def forward(self, x):
         )
 
     def test_normalize_view_copy_base_pass(self) -> None:
-
         class ViewChain(torch.nn.Module):
             def forward(self, x):
                 x = torch.ops.aten.view_copy.default(x, [30, 1])
@@ -1645,7 +1505,7 @@ def is_view_copy(node: torch.fx.Node) -> bool:
                 and node.target == torch.ops.aten.view_copy.default
             )
 
-        gm = export(ViewChain(), (torch.ones(30),)).graph_module
+        gm = export(ViewChain(), (torch.ones(30),), strict=True).graph_module
 
         # Check before transformation
         n_view_copy_before = 0
@@ -1680,7 +1540,6 @@ def is_view_copy(node: torch.fx.Node) -> bool:
         self.assertEqual(n_view_copy_bases_after, 0)
 
     def test_replace_view_copy_with_view_pass(self) -> None:  # noqa: C901
-
         # Helper functions
         def is_view_copy(node: torch.fx.Node) -> bool:
             return (
@@ -1704,10 +1563,7 @@ def forward(self, x):
                 # a computation before the end of the graph.
                 return torch.ops.aten.add.Tensor(o1, o2)
 
-        ep = torch.export.export(
-            TestViewCopies(),
-            args=(torch.ones(1),),
-        )
+        ep = torch.export.export(TestViewCopies(), args=(torch.ones(1),), strict=True)
         for node in ep.graph.nodes:
             if node.op == "placeholder":
                 node.meta["spec"] = TensorSpec.from_tensor(torch.empty(1))
@@ -1809,10 +1665,7 @@ def _do_checks(
         input = torch.randn([2, 3, 4, 5]).to(memory_format=torch.contiguous_format)
 
         # 1. vanilla export, no edge ops
-        ep = export(
-            m,
-            (input,),
-        ).run_decompositions({})
+        ep = export(m, (input,), strict=True).run_decompositions({})
         _do_checks(
             ep.graph_module.code,
             aten_op_str,
diff --git a/exir/tests/test_print_program.py b/exir/tests/test_print_program.py
index c53ca4c376..9440450a9c 100644
--- a/exir/tests/test_print_program.py
+++ b/exir/tests/test_print_program.py
@@ -39,7 +39,7 @@ def forward(self, x):
         warp_model = WrapModule()
         example_inputs = (torch.rand(1, 32, 16, 16),)
 
-        exir_exported_program = to_edge(export(warp_model, example_inputs))
+        exir_exported_program = to_edge(export(warp_model, example_inputs, strict=True))
         number_of_stack_trace = 0
         for node in exir_exported_program.exported_program().graph.nodes:
             node_info = inspect_node(
diff --git a/exir/tests/test_quant_fusion_pass.py b/exir/tests/test_quant_fusion_pass.py
index d14e85b496..bb829688bc 100644
--- a/exir/tests/test_quant_fusion_pass.py
+++ b/exir/tests/test_quant_fusion_pass.py
@@ -57,7 +57,7 @@ def forward(self, x, y):
         )
         m = _convert_to_reference_decomposed_fx(m)
         config = EdgeCompileConfig(_check_ir_validity=False)
-        m = to_edge(export(m, example_inputs), compile_config=config)
+        m = to_edge(export(m, example_inputs, strict=True), compile_config=config)
         # QuantFusionPass should be part of to_executorch() config, separating it out so that we can check the graph.
         m = m.transform([QuantFusionPass(_fix_node_meta_val=True)])
         # check that we are using functional variant of q/dq/add
@@ -96,7 +96,7 @@ def forward(self, x, y):
         m(*example_inputs)
         m = _convert_to_reference_decomposed_fx(m)
         config = EdgeCompileConfig(_check_ir_validity=False)
-        m = to_edge(export(m, example_inputs), compile_config=config)
+        m = to_edge(export(m, example_inputs, strict=True), compile_config=config)
         # QuantFusionPass should be part of to_executorch() config, separating it out so that we can check the graph.
         m = m.transform([QuantFusionPass(_fix_node_meta_val=True)])
         # check that we are using functional variant of q/dq/add/reshape
@@ -151,7 +151,7 @@ def forward(self, x, y):
         )
         m = _convert_to_reference_decomposed_fx(m)
         config = EdgeCompileConfig(_check_ir_validity=False)
-        m = to_edge(export(m, example_inputs), compile_config=config)
+        m = to_edge(export(m, example_inputs, strict=True), compile_config=config)
         # QuantFusionPass should be part of to_executorch() config, separating it out so that we can check the graph.
         m = m.transform([QuantFusionPass(_fix_node_meta_val=True)])
         # check that we are using functional variant of q/dq/add/slice
@@ -163,9 +163,7 @@ def forward(self, x, y):
             exactly=True,
         ).check("executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor").check(
             "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default"
-        ).check(
-            "executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor"
-        ).check(
+        ).check("executorch_exir_dialects_edge__ops_aten_slice_copy_Tensor").check(
             "executorch_exir_dialects_edge__ops_quantized_decomposed_add_default"
         ).check(
             "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default"
@@ -177,7 +175,9 @@ def forward(self, x, y):
         # check that we are using out variant of add and slice_copy
         FileCheck().check("torch.ops.quantized_decomposed.add.out").check(
             "torch.ops.aten.slice_copy.Tensor_out"
-        ).run(m.exported_program().graph_module.code)
+        ).run(
+            m.exported_program().graph_module.code
+        )
 
     def test_cat(self) -> None:
         class M(torch.nn.Module):
@@ -198,7 +198,7 @@ def forward(self, x, y):
         m(*example_inputs)
         m = _convert_to_reference_decomposed_fx(m)
         config = EdgeCompileConfig(_check_ir_validity=False)
-        m = to_edge(export(m, example_inputs), compile_config=config)
+        m = to_edge(export(m, example_inputs, strict=True), compile_config=config)
         # QuantFusionPass should be part of to_executorch() config, separating it out so that we can check the graph.
         m = m.transform([QuantFusionPass()])
         # check that we are using functional variant of q/dq/cat
@@ -293,7 +293,9 @@ def forward(self, indices):
                 _check_ir_validity=False,
                 _use_edge_ops=True,
             )
-            m = to_edge(export(m, example_inputs), compile_config=compile_config)
+            m = to_edge(
+                export(m, example_inputs, strict=True), compile_config=compile_config
+            )
             # QuantFusionPass should be part of to_executorch() config, separating it out so that we can check the graph.
             m = m.transform([QuantFusionPass(_fix_node_meta_val=True)])
             # check that we are using functional variant of q/dq/cat
@@ -349,7 +351,9 @@ def forward(self, indices):
                 _check_ir_validity=False,
                 _use_edge_ops=True,
             )
-            m = to_edge(export(m, example_inputs), compile_config=compile_config)
+            m = to_edge(
+                export(m, example_inputs, strict=True), compile_config=compile_config
+            )
             # QuantFusionPass should be part of to_executorch() config, separating it out so that we can check the graph.
             m = m.transform([QuantFusionPass(_fix_node_meta_val=True)])
             # check that we are using functional variant of q/dq/cat
diff --git a/exir/tests/test_quantization.py b/exir/tests/test_quantization.py
index 269a9ee11b..148d7f4f9d 100644
--- a/exir/tests/test_quantization.py
+++ b/exir/tests/test_quantization.py
@@ -71,7 +71,7 @@ def test_resnet(self) -> None:
                 _check_ir_validity=False,
             )
             m = to_edge(
-                export(m, example_inputs), compile_config=compile_config
+                export(m, example_inputs, strict=True), compile_config=compile_config
             ).transform([QuantFusionPass(), SpecPropPass()])
 
             after_quant_result = m.exported_program().module()(*example_inputs)[0]
diff --git a/exir/tests/test_remove_view_copy.py b/exir/tests/test_remove_view_copy.py
index 318dc085b4..b13fabede1 100644
--- a/exir/tests/test_remove_view_copy.py
+++ b/exir/tests/test_remove_view_copy.py
@@ -44,7 +44,7 @@ def test_disable(self) -> None:
         model = TestModel1()
         model.eval()
         example_inputs = model.get_example_inputs()
-        ep = torch.export.export(model, example_inputs)
+        ep = torch.export.export(model, example_inputs, strict=True)
         etpm = to_edge(ep).to_executorch(
             config=ExecutorchBackendConfig(
                 remove_view_copy=False,
@@ -59,7 +59,7 @@ def test_output_matches(self) -> None:
         model = TestModel1()
         model.eval()
         example_inputs = model.get_example_inputs()
-        ep = torch.export.export(model, example_inputs)
+        ep = torch.export.export(model, example_inputs, strict=True)
 
         epm_remove = to_edge(ep)
         epm_no_remove = copy.deepcopy(
@@ -96,7 +96,7 @@ def test_spec(self) -> None:
         model = TestModel1()
         model.eval()
         example_inputs = model.get_example_inputs()
-        ep = torch.export.export(model, example_inputs)
+        ep = torch.export.export(model, example_inputs, strict=True)
 
         etpm = to_edge(ep).to_executorch(
             config=ExecutorchBackendConfig(
diff --git a/exir/tests/test_serde.py b/exir/tests/test_serde.py
index 2c68920ff3..5b09ddf07c 100644
--- a/exir/tests/test_serde.py
+++ b/exir/tests/test_serde.py
@@ -49,7 +49,7 @@ def check_ep(
 
     # pyre-ignore
     def check_serde(self, m, inputs, check_executorch=True) -> None:
-        aten = export(m, inputs)
+        aten = export(m, inputs, strict=True)
         aten_new = deserialize(serialize(aten))
         self.check_ep(aten, aten_new, inputs)
 
@@ -135,7 +135,7 @@ def forward(self, x):
 
         sin_module = SinModule()
         model_inputs = (torch.ones(1),)
-        edgeir_m = to_edge(export(sin_module, model_inputs))
+        edgeir_m = to_edge(export(sin_module, model_inputs, strict=True))
         max_value = model_inputs[0].shape[0]
         compile_specs = [CompileSpec("max_value", bytes([max_value]))]
         lowered_sin_module = to_backend(
@@ -155,7 +155,7 @@ def forward(self, x):
 
         composite_model(*model_inputs)
 
-        edge = to_edge(export(composite_model, model_inputs))
+        edge = to_edge(export(composite_model, model_inputs, strict=True))
         edge_new = deserialize(serialize(edge.exported_program()))
         self.check_ep(edge.exported_program(), edge_new, model_inputs)
 
@@ -197,7 +197,7 @@ def forward(self, a, x, b):
         m = Model()
         inputs = (torch.randn(2, 2), torch.randn(2, 2), torch.randn(2, 2))
 
-        ep = to_edge(export(m, inputs))
+        ep = to_edge(export(m, inputs, strict=True))
         edge = ep.to_backend(AddMulPartitionerDemo())
         edge_new = deserialize(serialize(edge.exported_program()))
         self.check_ep(edge.exported_program(), edge_new, inputs)
@@ -217,7 +217,7 @@ def forward(self, x):
         inputs = (torch.randn(1, 1, 32, 32),)
 
         metadata = ()
-        edge = to_edge(export(m, inputs))
+        edge = to_edge(export(m, inputs, strict=True))
         for node in edge.exported_program().graph_module.graph.nodes:
             if "convolution" in str(node.target):
                 metadata = (
diff --git a/exir/tests/test_tracer.py b/exir/tests/test_tracer.py
index 415443c4c1..594e760ab3 100644
--- a/exir/tests/test_tracer.py
+++ b/exir/tests/test_tracer.py
@@ -107,7 +107,10 @@ def forward(self, x):
                 return x + y
 
         ep = torch.export.export(
-            M(), (torch.ones(3),), dynamic_shapes={"x": {0: torch.export.Dim("x")}}
+            M(),
+            (torch.ones(3),),
+            dynamic_shapes={"x": {0: torch.export.Dim("x")}},
+            strict=True,
         )
         exir.to_edge(ep)
 
diff --git a/exir/tests/test_verification.py b/exir/tests/test_verification.py
index c223e0ad84..f18e9d74b7 100644
--- a/exir/tests/test_verification.py
+++ b/exir/tests/test_verification.py
@@ -35,7 +35,7 @@ def f(x: torch.Tensor) -> torch.Tensor:
 
         # Generate program
         program = (
-            to_edge(export(WrapperModule(f), (torch.randn(2),)))
+            to_edge(export(WrapperModule(f), (torch.randn(2),), strict=True))
             .transform(
                 [
                     ConstPropPass(),
@@ -90,7 +90,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         model1 = Op1()
         inputs = (torch.ones(2, 2),)
         program = (
-            to_edge(export(model1, inputs)).to_executorch()._emitter_output.program
+            to_edge(export(model1, inputs, strict=True))
+            .to_executorch()
+            ._emitter_output.program
         )
 
         # Initialize and test Interpreter -- assert that the operators are same as above
@@ -104,7 +106,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         model2 = Op2()
         inputs = (torch.ones(2, 2),)
         program = (
-            to_edge(export(model2, inputs)).to_executorch()._emitter_output.program
+            to_edge(export(model2, inputs, strict=True))
+            .to_executorch()
+            ._emitter_output.program
         )
 
         # Initialize and test Interpreter -- assert that the operators are same as above
@@ -135,7 +139,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         # Generate a program with Op2's operations (remainder, div, add)
         model2 = Op2()
         inputs = torch.ones(2, 2)
-        exec_prog = to_edge(export(model2, (inputs,))).to_executorch()
+        exec_prog = to_edge(export(model2, (inputs,), strict=True)).to_executorch()
 
         exported_prog = exec_prog.exported_program()
         res = exported_prog.module()(inputs)[0]  # noqa
@@ -158,8 +162,7 @@ def forward(self, x):
         egm = (
             to_edge(
                 export(
-                    m,
-                    (torch.randn(1, 3, 100, 100).to(dtype=torch.int),),
+                    m, (torch.randn(1, 3, 100, 100).to(dtype=torch.int),), strict=True
                 )
             )
             .exported_program()
@@ -184,6 +187,7 @@ def forward(self, x, weight, bias):
                 export(
                     m,
                     (torch.rand(16, 8, 32, 32), torch.rand(8), torch.rand(8)),
+                    strict=True,
                 )
             )
             .exported_program()
@@ -202,16 +206,7 @@ def forward(self, x):
                 return torch._to_cpu(x)
 
         m = TestModel()
-        egm = (
-            to_edge(
-                export(
-                    m,
-                    ([],),
-                )
-            )
-            .exported_program()
-            .graph_module
-        )
+        egm = to_edge(export(m, ([],), strict=True)).exported_program().graph_module
         verifier = EXIREdgeDialectVerifier()
         verifier(egm)
         self.assertTrue(verifier.is_valid(egm))
@@ -228,8 +223,7 @@ def forward(self, x):
 
         m = TestModel()
         egm = export(
-            m,
-            (torch.randn(1, 3, 100, 100).to(dtype=torch.int),),
+            m, (torch.randn(1, 3, 100, 100).to(dtype=torch.int),), strict=True
         ).graph_module
         verifier = EXIREdgeDialectVerifier()
         with self.assertRaises(SpecViolationError):
@@ -247,8 +241,7 @@ def forward(self, x):
         egm = (
             to_edge(
                 export(
-                    m,
-                    (torch.randn(1, 3, 100, 100).to(dtype=torch.int),),
+                    m, (torch.randn(1, 3, 100, 100).to(dtype=torch.int),), strict=True
                 )
             )
             .exported_program()
@@ -267,6 +260,7 @@ def test_edge_sad_with_edge_ops(self) -> None:
                     export(
                         m,
                         (torch.randn(1, 3, 100, 100).to(dtype=torch.bfloat16),),
+                        strict=True,
                     )
                 )
                 .exported_program()
diff --git a/exir/verification/test/test_verifier.py b/exir/verification/test/test_verifier.py
index 1ee48ef4d4..369f976076 100644
--- a/exir/verification/test/test_verifier.py
+++ b/exir/verification/test/test_verifier.py
@@ -44,7 +44,7 @@ def forward(self, x, y):
                 torch._check(z < 4)
                 return x[z : z + y.shape[0]]
 
-        ep = torch.export.export(M(), (torch.randn(10), torch.tensor([3])))
+        ep = torch.export.export(M(), (torch.randn(10), torch.tensor([3])), strict=True)
 
         compile_config_with_disable_ir_validity = EdgeCompileConfig(
             _check_ir_validity=False
@@ -82,7 +82,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         example_input = (torch.zeros([2, 2]),)
 
-        export_model = export(m, example_input)
+        export_model = export(m, example_input, strict=True)
 
         compile_config_without_edge_op = EdgeCompileConfig(
             _use_edge_ops=False, _skip_dim_order=False
@@ -131,7 +131,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             ),
         )
 
-        export_model = export(m, example_input)
+        export_model = export(m, example_input, strict=True)
 
         compile_config_with_dim_order = EdgeCompileConfig(_skip_dim_order=False)
         compile_config_with_stride = EdgeCompileConfig(_skip_dim_order=True)
diff --git a/extension/android_test/add_model.py b/extension/android_test/add_model.py
index 5c7cf4770e..b7ac3955ee 100644
--- a/extension/android_test/add_model.py
+++ b/extension/android_test/add_model.py
@@ -13,7 +13,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
 
 
 # 1. torch.export: Defines the program with the ATen operator set.
-aten_dialect = export(Add(), (torch.ones(1), torch.ones(1)))
+aten_dialect = export(Add(), (torch.ones(1), torch.ones(1)), strict=True)
 
 # 2. to_edge: Make optimizations for Edge devices
 edge_program = to_edge(aten_dialect)
diff --git a/extension/llm/modules/test/test_attention.py b/extension/llm/modules/test/test_attention.py
index cda9becd69..82ee1febf4 100644
--- a/extension/llm/modules/test/test_attention.py
+++ b/extension/llm/modules/test/test_attention.py
@@ -150,6 +150,7 @@ def test_attention_export(self):
                 (self.x, self.x),
                 kwargs={"input_pos": self.input_pos},
                 dynamic_shapes=self.dynamic_shapes,
+                strict=True,
             )
         et_res = et_mha_ep.module()(self.x, self.x, input_pos=self.input_pos)
         tt_res = self.tt_mha(self.x, self.x, input_pos=self.input_pos)
@@ -196,6 +197,7 @@ def test_attention_executorch(self):
                 (self.x, self.x),
                 kwargs={"input_pos": self.input_pos},
                 dynamic_shapes=self.dynamic_shapes,
+                strict=True,
             )
         et_program = to_edge(
             et_mha_ep,
diff --git a/extension/llm/modules/test/test_position_embeddings.py b/extension/llm/modules/test/test_position_embeddings.py
index 039cc798b1..15da2335d7 100644
--- a/extension/llm/modules/test/test_position_embeddings.py
+++ b/extension/llm/modules/test/test_position_embeddings.py
@@ -49,7 +49,6 @@ def test_tile_positional_embedding_smoke(self):
         self.assertTrue(torch.allclose(y, ref_y))
 
     def test_tile_positional_embedding_export(self):
-
         tpe_ep = torch.export.export(
             self.tpe,
             (self.x, self.aspect_ratio),
@@ -57,6 +56,7 @@ def test_tile_positional_embedding_export(self):
                 self.dynamic_shape,
                 None,
             ),  # assuming aspect ratio is static
+            strict=True,
         )
 
         y = tpe_ep.module()(self.x, self.aspect_ratio)
@@ -91,6 +91,7 @@ def test_tile_positional_embedding_et(self):
                 self.dynamic_shape,
                 None,
             ),  # assuming aspect ratio is static
+            strict=True,
         )
         et_program = to_edge(
             tpe_ep,
@@ -148,7 +149,6 @@ def test_tiled_token_positional_embedding_smoke(self):
         assert_close(y, ref_y)
 
     def test_tiled_token_positional_embedding_export(self):
-
         tpe_ep = torch.export.export(
             self.tpe,
             (self.x, self.aspect_ratio),
@@ -156,6 +156,7 @@ def test_tiled_token_positional_embedding_export(self):
                 self.dynamic_shape,
                 None,
             ),  # assuming aspect ratio is static
+            strict=True,
         )
 
         y = tpe_ep.module()(self.x, self.aspect_ratio)
@@ -172,6 +173,7 @@ def test_tiled_token_positional_embedding_aoti(self):
                 self.dynamic_shape,
                 None,
             ),  # assuming aspect ratio is static
+            strict=True,
         )
 
         with tempfile.TemporaryDirectory() as tmpdir:
@@ -195,6 +197,7 @@ def test_tiled_token_positional_embedding_et(self):
                 self.dynamic_shape,
                 None,
             ),  # assuming aspect ratio is static
+            strict=True,
         )
         et_program = to_edge(
             tpe_ep,
diff --git a/extension/pybindings/test/make_test.py b/extension/pybindings/test/make_test.py
index 6681d00add..6503b0dea1 100644
--- a/extension/pybindings/test/make_test.py
+++ b/extension/pybindings/test/make_test.py
@@ -113,7 +113,7 @@ def forward(self, *args, **kwargs):
     # variant, along with some other transformations.
     for method_name, method_input in input_map.items():
         wrapped_mod = WrapperModule(getattr(eager_module, method_name))
-        exported_methods[method_name] = export(wrapped_mod, method_input)
+        exported_methods[method_name] = export(wrapped_mod, method_input, strict=True)
 
     exec_prog = to_edge(exported_methods).to_executorch(config=et_config)
 
@@ -136,7 +136,6 @@ def make_test(  # noqa: C901
     load_fn: Callable = runtime._load_for_executorch_from_buffer
 
     def wrapper(tester: unittest.TestCase) -> None:
-
         ######### TEST CASES #########
 
         def test_e2e(tester):
@@ -154,7 +153,6 @@ def test_e2e(tester):
             tester.assertEqual(str(expected), str(executorch_output))
 
         def test_multiple_entry(tester):
-
             program, inputs = create_program(ModuleMulti())
             executorch_module = load_fn(program.buffer)
 
@@ -268,7 +266,7 @@ def test_quantized_ops(tester):
             )
             m = _convert_to_reference_decomposed_fx(m)
             config = EdgeCompileConfig(_check_ir_validity=False)
-            m = to_edge(export(m, example_inputs), compile_config=config)
+            m = to_edge(export(m, example_inputs, strict=True), compile_config=config)
             m = m.transform([QuantFusionPass(_fix_node_meta_val=True)])
 
             exec_prog = m.to_executorch()
diff --git a/extension/training/examples/XOR/export_model.py b/extension/training/examples/XOR/export_model.py
index c2cff7d428..a245361e18 100644
--- a/extension/training/examples/XOR/export_model.py
+++ b/extension/training/examples/XOR/export_model.py
@@ -24,7 +24,7 @@ def _export_model():
 
     # Captures the forward graph. The graph will look similar to the model definition now.
     # Will move to export_for_training soon which is the api planned to be supported in the long term.
-    ep = export(net, (x, torch.ones(1, dtype=torch.int64)))
+    ep = export(net, (x, torch.ones(1, dtype=torch.int64)), strict=True)
     # Captures the backward graph. The exported_program now contains the joint forward and backward graph.
     ep = _export_forward_backward(ep)
     # Lower the graph to edge dialect.
diff --git a/extension/training/pybindings/test/test.py b/extension/training/pybindings/test/test.py
index b8feb8558c..84094f6c1a 100644
--- a/extension/training/pybindings/test/test.py
+++ b/extension/training/pybindings/test/test.py
@@ -33,7 +33,7 @@ def get_random_inputs(self):
 
     def test(self):
         m = self.ModuleSimpleTrain()
-        ep = torch.export.export(m, m.get_random_inputs())
+        ep = torch.export.export(m, m.get_random_inputs(), strict=True)
         ep = _export_forward_backward(ep)
         ep = to_edge(ep)
         ep = ep.to_executorch()
diff --git a/profiler/test/test_profiler_e2e.py b/profiler/test/test_profiler_e2e.py
index f5df82176e..b38644c210 100644
--- a/profiler/test/test_profiler_e2e.py
+++ b/profiler/test/test_profiler_e2e.py
@@ -52,7 +52,9 @@ def setUpClass(cls) -> None:
         # The serialized program file. This must live longer than cls.module,
         # because the C++ pybindings will have a pointer to it. But none of the
         # tests should need to touch it.
-        cls.__buffer: bytes = to_edge(export(model, inputs)).to_executorch().buffer
+        cls.__buffer: bytes = (
+            to_edge(export(model, inputs, strict=True)).to_executorch().buffer
+        )
 
         cls.module = _load_for_executorch_from_buffer(cls.__buffer)
 
diff --git a/test/end2end/exported_module.py b/test/end2end/exported_module.py
index 12aa938c0a..81d7ff9f6c 100644
--- a/test/end2end/exported_module.py
+++ b/test/end2end/exported_module.py
@@ -190,6 +190,7 @@ def __init__(self, method):
                         if method_name_to_dynamic_shapes
                         else None
                     ),
+                    strict=True,
                 )
 
         exec_prog = to_edge(
diff --git a/test/models/export_delegated_program.py b/test/models/export_delegated_program.py
index a37fe32e55..a85dab6753 100644
--- a/test/models/export_delegated_program.py
+++ b/test/models/export_delegated_program.py
@@ -130,7 +130,9 @@ def __init__(self, fn):
         def forward(self, *args, **kwargs):
             return self.fn(*args, **kwargs)
 
-    exported_program = export(WrapperModule(getattr(eager_module, method)), args=inputs)
+    exported_program = export(
+        WrapperModule(getattr(eager_module, method)), args=inputs, strict=True
+    )
 
     edge_config = EdgeCompileConfig(_check_ir_validity=False)
     et_config = exir.ExecutorchBackendConfig(
@@ -167,7 +169,7 @@ def forward(self, *args, **kwargs):
         composite_module(*inputs)
 
         executorch_program = to_edge(
-            export(composite_module, args=inputs)
+            export(composite_module, args=inputs, strict=True)
         ).to_executorch(config=et_config)
 
     return executorch_program.buffer
diff --git a/test/models/generate_linear_out_bundled_program.py b/test/models/generate_linear_out_bundled_program.py
index 93fd1445ef..c98ea7ed68 100644
--- a/test/models/generate_linear_out_bundled_program.py
+++ b/test/models/generate_linear_out_bundled_program.py
@@ -37,7 +37,7 @@ def main() -> None:
     trace_inputs = (torch.ones(2, 2, dtype=torch.float),)
 
     # Trace to FX Graph.
-    exec_prog = to_edge(export(model, trace_inputs)).to_executorch(
+    exec_prog = to_edge(export(model, trace_inputs, strict=True)).to_executorch(
         config=ExecutorchBackendConfig(
             memory_planning_pass=MemoryPlanningPass(),
             to_out_var_pass=ToOutVarPass(),