From a95dce20bbcdc35db6a44f1960f8634c3889fac0 Mon Sep 17 00:00:00 2001 From: acezxn Date: Sat, 29 Nov 2025 16:33:59 -0500 Subject: [PATCH 1/4] Fixed incorrect null check logic in nonlocal extraction and improved intra_dataflow_analyzer prompt. --- .../dfbscan/intra_dataflow_analyzer.json | 43 ++++++++++--------- src/tstool/analyzer/Javascript_TS_analyzer.py | 2 +- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json index 9b20e83..1c283cc 100644 --- a/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json +++ b/src/prompt/Javascript/dfbscan/intra_dataflow_analyzer.json @@ -18,7 +18,7 @@ " 2. Function Invocations: Call sites where SRC is passed as an argument.", " 3. Return Statements: Points where the function returns, possibly propagating SRC.", " 4. Parameter Assignments: Assignments where SRC is assigned to a parameter or an object field that is accessible outside the function.", - " 5. Non local variable assignment: Assignments where SRC is assigned to a predefined non local variable.", + " 5. Non local variable assignment: Assignments where SRC is assigned to a predefined non local variable that is accessible outside the function.", "- Step 2: Identify all execution paths relevant to the key points found in Step 1. For each path:", " - Identify every potential execution path;", " - Verify whether the key points are executed along each path;", @@ -69,39 +69,39 @@ "- Type: Argument; Name: a; Function: updateValue; Index: 0; Line: 8; Dependency: SRC (p) is assigned to 'a', then passed as an argument to updateValue;", "- Type: Return; Name: a; Function: None; Index: 0; Line: 10; Dependency: SRC (p) is assigned to 'a', then returned.", "", - "Example 2: Propagation via Parameter Passing and Return", + "Example 2: Propagation via Nonlocal Variable assignment", "User:", "Now I will give you a target function with the source point 'src' at line 1:", "```javascript", - "function modifyValue(src, flag) {", - " if (flag > 0) {", - " flag = src; // source value: src", - " } else if (flag == 0) {", - " return src; // return statement", + "function update(src, count) {", + " if (count > 0) {", + " item = src;", " }", - " return -1; // Default return value ", + " return item;", "}", "```", + "", + "Consider the following as non local variables:", + "- item at line 3", + "", "Where does the source variable 'src' at line 1 propagate within this function?", "System:", "Explanation:", "Step 1: Identify SRC and its alias;", "SRC: The variable src is defined at line 1;", "Step 2: Identify key points and execution paths:", - "Path 1 (flag > 0): src is assigned to flag at line 3, making it accessible outside the function if flag is referenced after the call;", - "Path 2 (flag == 0): src is returned at line 5, propagating to the caller;", - "Path 3 (flag < 0): Function returns -1, so SRC does not propagate in this path;", + "Path 1 (count > 0): src is used to modify the value of item at line 3, which is later returned at line 5;", + "Path 2 (count <= 0): the value of item is directly returned at line 5 without using src;", "Step 3: Simulate the execution paths:", - "Path 1: When flag > 0, src is assigned to flag, allowing potential propagation outside the function through the parameter reference;", - "Path 2: When flag == 0, src is returned to the caller;", - "Path 3: When flag < 0, src does not propagate, as the function returns -1;", + "In Path 1, propagation is from src -> Nonlocal variable assignment at item = src -> return item;", + "In Path 2, propagation is from src -> return item;", "Answer:", - "Path 1: Lines 1 -> 3;", - "- Type: Parameter; Name: flag; Function: None; Index: 1; Line: 3; Dependency: SRC (src) is assigned to parameter 'flag', which may be referenced by the caller;", + "Path 1: Lines 1 -> 3 -> 5;", + "- Type: Nonlocal; Name: item; Function: None; Index: None; Line: 3; Dependency: SRC (src) is used to update the value of item, allowing potential propagation outside the function;", + "- Type: Return; Name: item; Function: None; Index: 0; Line: 5; Dependency: the item nonlocal variable, which is updated to the value of src, is returned to the caller;", "Path 2: Lines 1 -> 5;", - "- Type: Return; Name: src; Function: None; Index: 0; Line: 5; Dependency: SRC (src) is returned to the caller;", - "Path 3: Lines 1 -> 6;", - "- No propagation; Dependency: Default return value -1 is unrelated to SRC." + "- No propagation; Dependency: The value of the item nonlocal variable is directly returned to the caller;", + "" ], "question_template": "- Where does the source at line in this function propagate?", "answer_format_cot": [ @@ -114,6 +114,7 @@ " - For parameter propagation: 'Type: Parameter; Name: {parameter name}; Function: None; Index: {parameter index}; Line: {assignment line number}; Dependency: {summary of dependency from SRC to parameter}';", " - For sink propagation: 'Type: Sink; Name: {sink name}; Function: None; Index: None; Line: {sink statement line number}; Dependency: {summary of dependency from SRC to sink}';", " - For non local variable assignment: 'Type: Nonlocal; Name: {non local name}; Function: None; Index: None; Line: {assignment statement line number}; Dependency: {summary of dependency from SRC to assignment}';", + " Note: Each bulletpoint under the path bulletpoint represent an identified key point along the execution path. Each key point information must be in a single line, and do not provide any key point information that does not adhere to one of the five types listed above.", "(4) If there is no propagation along a path, provide a brief explanation of why SRC does not propagate in that path as follows:", "- Path : ;", " - No propagation; Dependency: {reason for no propagation};", @@ -128,8 +129,10 @@ "Here are the Function call sites and return statements within the function, which can be used in Step 1;\n", "\n", "\n", - "", + "\n", "Now, please answer the following question:\n\n", "Your response should strictly follow the format:\n\n" ] } + + diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py index d27c5f9..c24ef82 100644 --- a/src/tstool/analyzer/Javascript_TS_analyzer.py +++ b/src/tstool/analyzer/Javascript_TS_analyzer.py @@ -147,7 +147,7 @@ def extract_nonlocal_info(self) -> None: ) for candidate_node in identifiers_per_scope[child_scope_id]: - if candidate_node: + if not candidate_node: continue # Name mismatch From 2388b591e0951672d70a376b9216b39c2bce29ba Mon Sep 17 00:00:00 2001 From: acezxn Date: Sat, 29 Nov 2025 16:46:52 -0500 Subject: [PATCH 2/4] Added requirements.txt --- requirements.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c8b3c1e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,19 @@ +black +tree-sitter>=0.20.0,<0.22.0 +transformers +torch +tiktoken +replicate +openai +google-generativeai +tqdm +networkx +streamlit +botocore +boto3 +black +anthropic +mypy +types-networkx +types-tqdm +boto3-stubs[essential] \ No newline at end of file From 0787adf3618d6c8d58754d17332c91d74a9e3bf3 Mon Sep 17 00:00:00 2001 From: acezxn Date: Sat, 29 Nov 2025 20:55:02 -0500 Subject: [PATCH 3/4] Prevented overwriting of scope_env when parsing multiple files --- src/tstool/analyzer/Javascript_TS_analyzer.py | 27 +++++++++---------- src/tstool/analyzer/TS_analyzer.py | 2 ++ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/tstool/analyzer/Javascript_TS_analyzer.py b/src/tstool/analyzer/Javascript_TS_analyzer.py index c24ef82..1a9aec5 100644 --- a/src/tstool/analyzer/Javascript_TS_analyzer.py +++ b/src/tstool/analyzer/Javascript_TS_analyzer.py @@ -22,33 +22,32 @@ def extract_scope_info(self, tree: tree_sitter.Tree) -> None: :param tree: Parsed syntax tree """ scope_stack: List[int] = [] - scope_id: int = 0 def search(root: Node) -> None: - nonlocal scope_id - for child in root.children: if child.type == "statement_block": if len(scope_stack) > 0: - self.scope_env[scope_stack[-1]][1].add(scope_id) + self.scope_env[scope_stack[-1]][1].add(self.current_scope_id) - self.scope_env[scope_id] = (child, set()) - self.scope_root_to_scope_id[child] = scope_id - scope_stack.append(scope_id) + self.scope_env[self.current_scope_id] = (child, set()) + self.scope_root_to_scope_id[child] = self.current_scope_id + scope_stack.append(self.current_scope_id) if child.parent: if child.parent.type == "function_declaration": - self.function_root_to_scope_id[child.parent] = scope_id + self.function_root_to_scope_id[child.parent] = ( + self.current_scope_id + ) elif ( child.parent.type == "arrow_function" or child.parent.type == "function_expression" ): if child.parent.parent: self.function_root_to_scope_id[child.parent.parent] = ( - scope_id + self.current_scope_id ) - scope_id += 1 + self.current_scope_id += 1 search(child) scope_stack.pop() else: @@ -56,10 +55,10 @@ def search(root: Node) -> None: return - self.scope_env[scope_id] = (tree.root_node, set()) - self.scope_root_to_scope_id[tree.root_node] = scope_id - scope_stack.append(scope_id) - scope_id += 1 + self.scope_env[self.current_scope_id] = (tree.root_node, set()) + self.scope_root_to_scope_id[tree.root_node] = self.current_scope_id + scope_stack.append(self.current_scope_id) + self.current_scope_id += 1 search(tree.root_node) return diff --git a/src/tstool/analyzer/TS_analyzer.py b/src/tstool/analyzer/TS_analyzer.py index 42e4f94..2f44c67 100644 --- a/src/tstool/analyzer/TS_analyzer.py +++ b/src/tstool/analyzer/TS_analyzer.py @@ -178,6 +178,8 @@ def __init__( self.scope_env: Dict[int, Tuple[Node, Set[int]]] = {} self.api_env: Dict[int, API] = {} + self.current_scope_id: int = 0 + # Dictionary storing mapping from the root node of the scope to its scope id self.scope_root_to_scope_id: Dict[Node, int] = {} From fb522962a46ca524b2f877c15faffc5fe4886d28 Mon Sep 17 00:00:00 2001 From: acezxn Date: Sun, 30 Nov 2025 15:54:07 -0500 Subject: [PATCH 4/4] Fixed type annotations --- src/llmtool/dfbscan/intra_dataflow_analyzer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/llmtool/dfbscan/intra_dataflow_analyzer.py b/src/llmtool/dfbscan/intra_dataflow_analyzer.py index ee263ad..97d2aa5 100644 --- a/src/llmtool/dfbscan/intra_dataflow_analyzer.py +++ b/src/llmtool/dfbscan/intra_dataflow_analyzer.py @@ -1,7 +1,7 @@ from os import path import json import time -from typing import List, Set, Optional, Dict +from typing import List, Set, Optional, Dict, Union from llmtool.LLM_utils import * from llmtool.LLM_tool import * from memory.syntactic.function import * @@ -149,7 +149,7 @@ def _parse_response( r"Line:\s*([^;]+);" ) - current_path = None + current_path: Optional[Dict[str, Union[str, list]]] = None for line in response.splitlines(): line = line.strip().lstrip("-").strip() if not line: @@ -176,7 +176,8 @@ def _parse_response( "index": detail_match.group(4).strip(), "line": detail_match.group(5).strip(), } - current_path["propagation_details"].append(detail) + if isinstance(current_path["propagation_details"], list): + current_path["propagation_details"].append(detail) elif current_path is not None: paths.append(current_path)