From 9c0f322f215b50f672c02013a8141489cad8f7f9 Mon Sep 17 00:00:00 2001
From: andrecs <12188364+andrecsilva@users.noreply.github.com>
Date: Fri, 4 Jul 2025 11:08:08 -0300
Subject: [PATCH 1/3] Added v2 to v3 transformation that includes package
changes
---
src/codemodder/codetf/v3/codetf.py | 45 ++++++++++
tests/test_codetf.py | 127 ++++++++++++++++++++++++++++-
2 files changed, 171 insertions(+), 1 deletion(-)
diff --git a/src/codemodder/codetf/v3/codetf.py b/src/codemodder/codetf/v3/codetf.py
index 5653b977..9ca513a0 100644
--- a/src/codemodder/codetf/v3/codetf.py
+++ b/src/codemodder/codetf/v3/codetf.py
@@ -5,8 +5,11 @@
from pydantic import BaseModel, model_validator
+from codemodder.logging import logger
+
from ..common import Change, CodeTFWriter, Finding, FixQuality
from ..v2.codetf import AIMetadata as AIMetadatav2
+from ..v2.codetf import ChangeSet as v2ChangeSet
from ..v2.codetf import CodeTF as CodeTFv2
from ..v2.codetf import Result
from ..v2.codetf import Run as Runv2
@@ -148,6 +151,48 @@ def from_v2_aimetadata(ai_metadata: AIMetadatav2) -> AIMetadata:
)
+def from_v2_result_per_finding(result: Result) -> FixResult | None:
+ """
+ This transformation assumes that the v2 result will only contain a single fixedFinding for all changesets.
+ """
+ # Find the changeset with a fixedFinding
+ try:
+ changeset: v2ChangeSet = next(cs for cs in result.changeset if cs.fixedFindings)
+ except StopIteration:
+ logger.debug("No fixedFinding in the given Result")
+ return None
+
+ assert changeset.fixedFindings
+ finding = changeset.fixedFindings[0]
+
+ v3changesets = [
+ ChangeSet(
+ path=cs.path, diff=cs.diff, changes=[c.to_common() for c in cs.changes]
+ )
+ for cs in result.changeset
+ ]
+
+ generation_metadata = GenerationMetadata(
+ strategy=Strategy.ai if changeset.ai else Strategy.deterministic,
+ ai=from_v2_aimetadata(changeset.ai) if changeset.ai else None,
+ provisional=False,
+ )
+
+ fix_metadata = FixMetadata(
+ id=result.codemod,
+ summary=result.summary,
+ description=result.description,
+ generation=generation_metadata,
+ )
+
+ return FixResult(
+ finding=Finding(**finding.model_dump()),
+ fixStatus=FixStatus(status=FixStatusType.fixed),
+ changeSets=v3changesets,
+ fixMetadata=fix_metadata,
+ )
+
+
def from_v2_result(result: Result) -> list[FixResult]:
fix_results: list[FixResult] = []
# generate fixed
diff --git a/tests/test_codetf.py b/tests/test_codetf.py
index 1b97da7a..6d03ab3f 100644
--- a/tests/test_codetf.py
+++ b/tests/test_codetf.py
@@ -24,7 +24,12 @@
Strategy,
)
from codemodder.codetf.v3.codetf import Finding as FindingV3
-from codemodder.codetf.v3.codetf import FixStatusType, from_v2, from_v2_result
+from codemodder.codetf.v3.codetf import (
+ FixStatusType,
+ from_v2,
+ from_v2_result,
+ from_v2_result_per_finding,
+)
@pytest.fixture(autouse=True)
@@ -259,6 +264,126 @@ def test_v2_result_to_v3():
assert from_v2_result(result)
+def test_v2_result_to_v3_per_finding():
+ result = Result(
+ codemod="codeql:java/log-injection",
+ summary="Introduced protections against Log Inject ion / Forging attacks",
+ description='This change ensures that log messages can\'t contain newline characters, leaving you vulnerable to Log Forging / Log Injection.\n\nIf malicious users can get newline characters into a log message, they can inject and forge new log entries that look like they came from the server, and trick log analysis tools, administrators, and more . This leads to vulnerabilities like Log Injection, Log Forging, and more attacks from there.\n\nOur change simply strips out newline characters from log messages, ensuring that they can \'t be used to forge new log entries.\n```diff\n+ import io.github.pixee.security.Newlines;\n ...\n String orderId = getUserOrderId();\n- log.info("User order ID: " + orderId);\n+ log. info("User order ID: " + Newlines.stripNewlines(orderId));\n```\n',
+ detectionTool=DetectionTool(name="CodeQL"),
+ references=[
+ Reference(
+ url="https://owasp.org/www-community/attacks/Log_Inj ection",
+ description="https://owasp.org/www-community/attacks/Log_Injection",
+ ),
+ Reference(
+ url="https://knowledge-base.secureflag.com/vulnerabilities/inadequate_input_validation/log_inject ion_vulnerability.html",
+ description="https://knowledge-base.secureflag.com/vulnerabilities/inadequate_input_validation/log_injection_vulnerability.html",
+ ),
+ Reference(
+ url="https://cwe.mit re.org/data/definitions/117.html",
+ description="https://cwe.mitre.org/data/definitions/117.html",
+ ),
+ ],
+ properties={},
+ failedFiles=[],
+ changeset=[
+ ChangeSet(
+ path="app/src/main/java/org/apache/roller/planet/business/fetcher/RomeFeedFetcher.java",
+ diff='--- RomeFeedFetcher.java\n+++ RomeFeedFetcher.java\n@@ -26,6 +26,7 @@\n import com.rometools.rome.io.FeedException;\n import com.rometools.rome.io.SyndFeedInput;\n import com.rometools.rome.io.XmlReader;\n+import static io.github.pixee.security.Newlines.stripAll;\n \n import java.io.IOException;\n import java.net.URI;\n@@ -123,7 +124,7 @@\n }\n \n if(log.isDebugEnabled()) {\n- log.debug("Subscription is: " + newSub.toString());\n+ log.debug("Subscription is: " + stripAll(newSub.toString()));\n }\n \n ',
+ changes=[
+ Change(
+ lineNumber=126,
+ description="Added a call to replace any newlines the value",
+ diffSide=DiffSide.LEFT,
+ properties={},
+ packageActions=[
+ PackageAction(
+ action=Action.ADD,
+ result=PackageResult.COMPLETED,
+ package="pkg:maven/io.github.pixee/java-security-toolkit@1.2.2",
+ ),
+ PackageAction(
+ action=Action.ADD,
+ result=PackageResult.COMPLETED,
+ package="pkg:maven/io.github.pixee/java-security-toolkit@1.2.2",
+ ),
+ ],
+ fixedFindings=[
+ Finding(
+ id="915a8320-3ee8-4b0e-849b-c1b380fb83e2",
+ rule=Rule(
+ id="log-injection",
+ name="Log Injection",
+ url="https://codeql.github.com/codeql-query-help/java/java-log-injection/",
+ ),
+ )
+ ],
+ )
+ ],
+ ai=None,
+ strategy=Strategy.deterministic,
+ provisional=False,
+ fixedFindings=[
+ Finding(
+ id="915a8320-3ee8-4b0e-849b-c1b380fb83e2",
+ rule=Rule(
+ id="log-injection",
+ name="Log Injection",
+ url="https://codeql.github.com/codeql-query-help/java/java-log-injection/",
+ ),
+ )
+ ],
+ fixQuality=None,
+ ),
+ ChangeSet(
+ path="app/pom.xml",
+ diff="--- app/pom.xml\n+++ app/pom.xml\n@@ -591,9 +591,12 @@\n 5.3.0\n test\n \n+ \n+ io.github.pixee\n+ java-security-toolkit\n+ \n+ \n \n- \n-\n \n \n roller",
+ changes=[
+ Change(
+ lineNumber=594,
+ description="This library holds security tools for protecting Java API calls.\n\nLicense: MIT ✅ | [Open source](https://github.com/pixee/java-security-toolkit) ✅ | [More facts](https://mvnrepository.com/artifact/io.github.pixee/java-security-toolkit/1.2.2)\n",
+ diffSide=DiffSide.RIGHT,
+ properties={"contextual_description": "true"},
+ packageActions=[],
+ fixedFindings=[],
+ )
+ ],
+ ai=None,
+ strategy=Strategy.deterministic,
+ provisional=False,
+ fixedFindings=[],
+ fixQuality=None,
+ ),
+ ChangeSet(
+ path="pom.xml",
+ diff="--- pom.xml\n+++ pom.xml\n@@ -48,7 +48,8 @@\n UTF-8\n 6.1.5\n 1.7.36\n- \n+ 1.2.2\n+ \n \n \n app\n@@ -110,7 +111,12 @@\n 5.11.4\n test\n \n- \n+ \n+ io.github.pixee\n+ java-security-toolkit\n+ ${versions.java-security-toolkit}\n+ \n+ \n \n \n ",
+ changes=[
+ Change(
+ lineNumber=114,
+ description="This library holds security tools for protecting Java API calls.\n\nLicense: MIT ✅ | [Open source](https://github.com/pixee/java-security-toolkit) ✅ | [More facts](https://mvnrepository.com/artifact/io.github.pixee/java-security-toolkit/1.2.2)\n",
+ diffSide=DiffSide.RIGHT,
+ properties={"contextual_description": "true"},
+ packageActions=[],
+ fixedFindings=[],
+ )
+ ],
+ ai=None,
+ strategy=Strategy.deterministic,
+ provisional=False,
+ fixedFindings=[],
+ fixQuality=None,
+ ),
+ ],
+ unfixedFindings=[],
+ )
+ fix_result = from_v2_result_per_finding(result)
+ assert fix_result
+ assert len(fix_result.changeSets) == 3
+ all_paths = {cs.path for cs in fix_result.changeSets}
+ assert "app/pom.xml" in all_paths
+ assert "pom.xml" in all_paths
+
+
def test_v2_to_v3_conversion():
with open("tests/samples/codetfv2_sample.codetf", "r") as f:
codetfv2 = CodeTF.model_validate_json(f.read())
From fba9cffdb529b716bd711161c32028d68dca0df9 Mon Sep 17 00:00:00 2001
From: andrecs <12188364+andrecsilva@users.noreply.github.com>
Date: Mon, 7 Jul 2025 07:48:02 -0300
Subject: [PATCH 2/3] Changed debug message
---
src/codemodder/codetf/v3/codetf.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/codemodder/codetf/v3/codetf.py b/src/codemodder/codetf/v3/codetf.py
index 9ca513a0..719a2394 100644
--- a/src/codemodder/codetf/v3/codetf.py
+++ b/src/codemodder/codetf/v3/codetf.py
@@ -159,7 +159,7 @@ def from_v2_result_per_finding(result: Result) -> FixResult | None:
try:
changeset: v2ChangeSet = next(cs for cs in result.changeset if cs.fixedFindings)
except StopIteration:
- logger.debug("No fixedFinding in the given Result")
+ logger.debug("Either no changesets or no fixedFinding in the given Result")
return None
assert changeset.fixedFindings
From 4c5cfe67d9040733c418b7d03790c8e94ccf461c Mon Sep 17 00:00:00 2001
From: andrecs <12188364+andrecsilva@users.noreply.github.com>
Date: Mon, 7 Jul 2025 14:34:15 -0300
Subject: [PATCH 3/3] Bugfixes and added parameters for generation metadata
---
src/codemodder/codetf/v3/codetf.py | 47 +++++++++++++++++++++++-------
tests/test_codetf.py | 20 ++++++++++++-
2 files changed, 55 insertions(+), 12 deletions(-)
diff --git a/src/codemodder/codetf/v3/codetf.py b/src/codemodder/codetf/v3/codetf.py
index 719a2394..f5ec09b8 100644
--- a/src/codemodder/codetf/v3/codetf.py
+++ b/src/codemodder/codetf/v3/codetf.py
@@ -11,6 +11,7 @@
from ..v2.codetf import AIMetadata as AIMetadatav2
from ..v2.codetf import ChangeSet as v2ChangeSet
from ..v2.codetf import CodeTF as CodeTFv2
+from ..v2.codetf import Finding as v2Finding
from ..v2.codetf import Result
from ..v2.codetf import Run as Runv2
@@ -151,20 +152,35 @@ def from_v2_aimetadata(ai_metadata: AIMetadatav2) -> AIMetadata:
)
-def from_v2_result_per_finding(result: Result) -> FixResult | None:
+def from_v2_result_per_finding(
+ result: Result,
+ strategy: Strategy | None = None,
+ ai_metadata: AIMetadata | None = None,
+ provisional: bool | None = None,
+) -> FixResult | None:
"""
This transformation assumes that the v2 result will only contain a single fixedFinding for all changesets.
"""
+
+ changeset: v2ChangeSet | None = None
+ finding: v2Finding | None = None
# Find the changeset with a fixedFinding
- try:
- changeset: v2ChangeSet = next(cs for cs in result.changeset if cs.fixedFindings)
- except StopIteration:
- logger.debug("Either no changesets or no fixedFinding in the given Result")
+ for cs in result.changeset:
+ if cs.fixedFindings:
+ changeset = cs
+ finding = cs.fixedFindings[0]
+ break
+ else:
+ # check each individual change
+ for change in cs.changes:
+ if change.fixedFindings:
+ changeset = cs
+ finding = change.fixedFindings[0]
+ break
+ if changeset is None or finding is None:
+ logger.debug("Either no changesets or fixed finding in the result")
return None
- assert changeset.fixedFindings
- finding = changeset.fixedFindings[0]
-
v3changesets = [
ChangeSet(
path=cs.path, diff=cs.diff, changes=[c.to_common() for c in cs.changes]
@@ -172,10 +188,19 @@ def from_v2_result_per_finding(result: Result) -> FixResult | None:
for cs in result.changeset
]
+ # Generate the GenerationMetadata from the changeset if not passed as a parameter
+ fix_result_strategy = strategy or (
+ Strategy.ai if changeset.ai else Strategy.deterministic
+ )
+ fix_result_ai_metadata = ai_metadata or (
+ from_v2_aimetadata(changeset.ai) if changeset.ai else None
+ )
+ fix_result_provisional = provisional or changeset.provisional or False
+
generation_metadata = GenerationMetadata(
- strategy=Strategy.ai if changeset.ai else Strategy.deterministic,
- ai=from_v2_aimetadata(changeset.ai) if changeset.ai else None,
- provisional=False,
+ strategy=fix_result_strategy,
+ ai=fix_result_ai_metadata,
+ provisional=fix_result_provisional,
)
fix_metadata = FixMetadata(
diff --git a/tests/test_codetf.py b/tests/test_codetf.py
index 6d03ab3f..b320c4d5 100644
--- a/tests/test_codetf.py
+++ b/tests/test_codetf.py
@@ -23,9 +23,15 @@
PackageResult,
Strategy,
)
+from codemodder.codetf.v3.codetf import (
+ AIMetadata,
+)
from codemodder.codetf.v3.codetf import Finding as FindingV3
from codemodder.codetf.v3.codetf import (
FixStatusType,
+)
+from codemodder.codetf.v3.codetf import Strategy as StrategyV3
+from codemodder.codetf.v3.codetf import (
from_v2,
from_v2_result,
from_v2_result_per_finding,
@@ -376,12 +382,24 @@ def test_v2_result_to_v3_per_finding():
],
unfixedFindings=[],
)
- fix_result = from_v2_result_per_finding(result)
+ fix_result = from_v2_result_per_finding(
+ result,
+ strategy=StrategyV3.ai,
+ provisional=True,
+ ai_metadata=AIMetadata(provider="pixee"),
+ )
assert fix_result
assert len(fix_result.changeSets) == 3
all_paths = {cs.path for cs in fix_result.changeSets}
assert "app/pom.xml" in all_paths
assert "pom.xml" in all_paths
+ assert fix_result.fixMetadata
+ # Assert that the metadata complies with the passed parameters
+ assert fix_result.fixMetadata.generation.strategy == StrategyV3.ai
+ assert fix_result.fixMetadata.generation.provisional
+ assert fix_result.fixMetadata.generation.ai
+ assert fix_result.fixMetadata.generation.ai.provider
+ assert fix_result.fixMetadata.generation.ai.provider == "pixee"
def test_v2_to_v3_conversion():