Skip to content

Commit c80a9f5

Browse files
marc-jasper-sonarsourcesonartech
authored andcommitted
SONARPY-3784 Add miss-classified telemetry keys based on path-based heuristic (#856)
GitOrigin-RevId: 789b42f8c6a190a19c6fbd511197cc44f7298e8c
1 parent ec66f39 commit c80a9f5

7 files changed

Lines changed: 312 additions & 65 deletions

File tree

python-commons/src/main/java/org/sonar/plugins/python/PythonScanner.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ protected void scanFile(PythonInputFile inputFile) throws IOException {
147147
newSymbolsCollector.collect(context.newSymbolTable().onFile(inputFile.wrappedFile()), visitorContext.rootTree());
148148
pythonHighlighter.highlight(context, visitorContext, inputFile);
149149
typeInferenceTelemetryCollector.collect(visitorContext.rootTree());
150-
testFileTelemetryCollector.collect(visitorContext.rootTree(), fileType);
150+
testFileTelemetryCollector.collect(visitorContext.rootTree(), fileType, inputFile.wrappedFile().uri().getPath());
151151
}
152152

153153
searchForDataBricks(visitorContext);

python-commons/src/main/java/org/sonar/plugins/python/PythonSensor.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,12 +200,22 @@ private void updateTypeInferenceTelemetry(PythonScanner scanner) {
200200

201201
private void updateTestFileTelemetry(PythonScanner scanner) {
202202
TestFileTelemetry telemetry = scanner.getTestFileTelemetry();
203+
203204
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_FILES_TOTAL, telemetry.totalMainFiles());
204-
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_FILES_MISCLASSIFIED_TEST, telemetry.misclassifiedTestFiles());
205205
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_LINES_TOTAL, telemetry.totalLines());
206206
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_LINES, telemetry.totalMainLines());
207207
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_TEST_LINES, telemetry.testLines());
208-
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_LINES_MISCLASSIFIED_TEST, telemetry.misclassifiedTestLines());
208+
209+
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_FILES_MISCLASSIFIED_IMPORT_BASED, telemetry.importBasedMisclassifiedTestFiles());
210+
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_LINES_MISCLASSIFIED_IMPORT_BASED, telemetry.importBasedMisclassifiedTestLines());
211+
212+
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_FILES_MISCLASSIFIED_PATH_BASED, telemetry.pathBasedMisclassifiedTestFiles());
213+
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_LINES_MISCLASSIFIED_PATH_BASED, telemetry.pathBasedMisclassifiedTestLines());
214+
215+
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_FILES_MISCLASSIFIED_IMPORT_BASED_ONLY, telemetry.filesInImportBasedOnly());
216+
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_LINES_MISCLASSIFIED_IMPORT_BASED_ONLY, telemetry.linesInImportBasedOnly());
217+
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_FILES_MISCLASSIFIED_PATH_BASED_ONLY, telemetry.filesInPathBasedOnly());
218+
sensorTelemetryStorage.updateMetric(TelemetryMetricKey.PYTHON_MAIN_LINES_MISCLASSIFIED_PATH_BASED_ONLY, telemetry.linesInPathBasedOnly());
209219
}
210220

211221
private void updateNamespacePackageTelemetry(PythonIndexer pythonIndexer) {

python-commons/src/main/java/org/sonar/plugins/python/telemetry/TelemetryMetricKey.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,17 @@ public enum TelemetryMetricKey {
4949
PYTHON_TYPES_SYMBOLS_UNIQUE("python.types.symbols.unique"),
5050
PYTHON_TYPES_SYMBOLS_UNKNOWN("python.types.symbols.unknown"),
5151
PYTHON_MAIN_FILES_TOTAL("python.files.main.total"),
52-
PYTHON_MAIN_FILES_MISCLASSIFIED_TEST("python.files.main.misclassified_test"),
5352
PYTHON_LINES_TOTAL("python.lines.total"),
5453
PYTHON_MAIN_LINES("python.lines.main"),
5554
PYTHON_TEST_LINES("python.lines.test"),
56-
PYTHON_MAIN_LINES_MISCLASSIFIED_TEST("python.lines.main.misclassified_test");
55+
PYTHON_MAIN_FILES_MISCLASSIFIED_IMPORT_BASED("python.files.main.misclassified_import_based"),
56+
PYTHON_MAIN_LINES_MISCLASSIFIED_IMPORT_BASED("python.lines.main.misclassified_import_based"),
57+
PYTHON_MAIN_FILES_MISCLASSIFIED_PATH_BASED("python.files.main.misclassified_path_based"),
58+
PYTHON_MAIN_LINES_MISCLASSIFIED_PATH_BASED("python.lines.main.misclassified_path_based"),
59+
PYTHON_MAIN_FILES_MISCLASSIFIED_IMPORT_BASED_ONLY("python.files.main.misclassified_import_based_only"),
60+
PYTHON_MAIN_LINES_MISCLASSIFIED_IMPORT_BASED_ONLY("python.lines.main.misclassified_import_based_only"),
61+
PYTHON_MAIN_FILES_MISCLASSIFIED_PATH_BASED_ONLY("python.files.main.misclassified_path_based_only"),
62+
PYTHON_MAIN_LINES_MISCLASSIFIED_PATH_BASED_ONLY("python.lines.main.misclassified_path_based_only");
5763

5864
private final String key;
5965

python-commons/src/main/java/org/sonar/plugins/python/telemetry/collectors/TestFileTelemetry.java

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,50 @@
2020
* Telemetry data for tracking test file misclassification.
2121
*
2222
* @param totalMainFiles Total number of files classified as MAIN
23-
* @param misclassifiedTestFiles Number of MAIN files that appear to be test files (import unittest/pytest or follow pytest patterns)
23+
* @param importBasedMisclassifiedTestFiles Number of MAIN files that appear to be test files based on import heuristic (import unittest/pytest or follow pytest patterns)
2424
* @param totalLines Total number of lines across all files (MAIN + TEST)
2525
* @param totalMainLines Total number of lines across all MAIN files
2626
* @param testLines Number of lines across all TEST files (as classified by the scanner engine)
27-
* @param misclassifiedTestLines Number of lines across misclassified test files (subset of totalMainLines)
27+
* @param importBasedMisclassifiedTestLines Number of lines across import-based misclassified test files (subset of totalMainLines)
28+
* @param pathBasedMisclassifiedTestFiles Number of MAIN files that appear to be test files based on path heuristic (contains "test" or "tests" in path)
29+
* @param pathBasedMisclassifiedTestLines Number of lines across path-based misclassified test files
30+
* @param filesInImportBasedOnly Number of files detected by import-based heuristic but not by path-based heuristic
31+
* @param filesInPathBasedOnly Number of files detected by path-based heuristic but not by import-based heuristic
32+
* @param linesInImportBasedOnly Number of lines in files detected by import-based heuristic but not by path-based heuristic
33+
* @param linesInPathBasedOnly Number of lines in files detected by path-based heuristic but not by import-based heuristic
2834
*/
2935
public record TestFileTelemetry(
3036
long totalMainFiles,
31-
long misclassifiedTestFiles,
37+
long importBasedMisclassifiedTestFiles,
3238
long totalLines,
3339
long totalMainLines,
3440
long testLines,
35-
long misclassifiedTestLines) {
41+
long importBasedMisclassifiedTestLines,
42+
long pathBasedMisclassifiedTestFiles,
43+
long pathBasedMisclassifiedTestLines,
44+
long filesInImportBasedOnly,
45+
long filesInPathBasedOnly,
46+
long linesInImportBasedOnly,
47+
long linesInPathBasedOnly) {
3648

3749
public static TestFileTelemetry empty() {
38-
return new TestFileTelemetry(0, 0, 0, 0, 0, 0);
50+
return new TestFileTelemetry(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3951
}
4052

4153
public TestFileTelemetry add(TestFileTelemetry other) {
4254
return new TestFileTelemetry(
4355
this.totalMainFiles + other.totalMainFiles,
44-
this.misclassifiedTestFiles + other.misclassifiedTestFiles,
56+
this.importBasedMisclassifiedTestFiles + other.importBasedMisclassifiedTestFiles,
4557
this.totalLines + other.totalLines,
4658
this.totalMainLines + other.totalMainLines,
4759
this.testLines + other.testLines,
48-
this.misclassifiedTestLines + other.misclassifiedTestLines
60+
this.importBasedMisclassifiedTestLines + other.importBasedMisclassifiedTestLines,
61+
this.pathBasedMisclassifiedTestFiles + other.pathBasedMisclassifiedTestFiles,
62+
this.pathBasedMisclassifiedTestLines + other.pathBasedMisclassifiedTestLines,
63+
this.filesInImportBasedOnly + other.filesInImportBasedOnly,
64+
this.filesInPathBasedOnly + other.filesInPathBasedOnly,
65+
this.linesInImportBasedOnly + other.linesInImportBasedOnly,
66+
this.linesInPathBasedOnly + other.linesInPathBasedOnly
4967
);
5068
}
5169
}

python-commons/src/main/java/org/sonar/plugins/python/telemetry/collectors/TestFileTelemetryCollector.java

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.sonar.plugins.python.telemetry.collectors;
1818

19+
import java.util.Locale;
1920
import java.util.Set;
2021
import java.util.concurrent.atomic.AtomicLong;
2122
import org.sonar.api.batch.fs.InputFile;
@@ -31,13 +32,19 @@ public class TestFileTelemetryCollector {
3132
private static final Set<String> TEST_FRAMEWORK_MODULES = Set.of("unittest", "pytest");
3233

3334
private final AtomicLong totalMainFiles = new AtomicLong(0);
34-
private final AtomicLong misclassifiedTestFiles = new AtomicLong(0);
35+
private final AtomicLong importBasedMisclassifiedTestFiles = new AtomicLong(0);
3536
private final AtomicLong totalLines = new AtomicLong(0);
3637
private final AtomicLong totalMainLines = new AtomicLong(0);
3738
private final AtomicLong testLines = new AtomicLong(0);
38-
private final AtomicLong misclassifiedTestLines = new AtomicLong(0);
39-
40-
public void collect(FileInput rootTree, InputFile.Type fileType) {
39+
private final AtomicLong importBasedMisclassifiedTestLines = new AtomicLong(0);
40+
private final AtomicLong pathBasedMisclassifiedTestFiles = new AtomicLong(0);
41+
private final AtomicLong pathBasedMisclassifiedTestLines = new AtomicLong(0);
42+
private final AtomicLong filesInImportBasedOnly = new AtomicLong(0);
43+
private final AtomicLong filesInPathBasedOnly = new AtomicLong(0);
44+
private final AtomicLong linesInImportBasedOnly = new AtomicLong(0);
45+
private final AtomicLong linesInPathBasedOnly = new AtomicLong(0);
46+
47+
public void collect(FileInput rootTree, InputFile.Type fileType, String filePath) {
4148
long lines = lineCount(rootTree);
4249
totalLines.addAndGet(lines);
4350

@@ -49,13 +56,30 @@ public void collect(FileInput rootTree, InputFile.Type fileType) {
4956
totalMainFiles.incrementAndGet();
5057
totalMainLines.addAndGet(lines);
5158

52-
if (isMisclassifiedTestFile(rootTree)) {
53-
misclassifiedTestFiles.incrementAndGet();
54-
misclassifiedTestLines.addAndGet(lines);
59+
boolean isImportBasedMisclassified = isImportBasedMisclassifiedTestFile(rootTree);
60+
boolean isPathBasedMisclassified = isPathBasedMisclassifiedTestFile(filePath);
61+
62+
if (isImportBasedMisclassified) {
63+
importBasedMisclassifiedTestFiles.incrementAndGet();
64+
importBasedMisclassifiedTestLines.addAndGet(lines);
65+
}
66+
67+
if (isPathBasedMisclassified) {
68+
pathBasedMisclassifiedTestFiles.incrementAndGet();
69+
pathBasedMisclassifiedTestLines.addAndGet(lines);
70+
}
71+
72+
// Track differences between heuristics
73+
if (isImportBasedMisclassified && !isPathBasedMisclassified) {
74+
filesInImportBasedOnly.incrementAndGet();
75+
linesInImportBasedOnly.addAndGet(lines);
76+
} else if (isPathBasedMisclassified && !isImportBasedMisclassified) {
77+
filesInPathBasedOnly.incrementAndGet();
78+
linesInPathBasedOnly.addAndGet(lines);
5579
}
5680
}
5781

58-
private static boolean isMisclassifiedTestFile(FileInput rootTree) {
82+
private static boolean isImportBasedMisclassifiedTestFile(FileInput rootTree) {
5983
var importVisitor = new TestImportVisitor();
6084
rootTree.accept(importVisitor);
6185
if (importVisitor.hasTestFrameworkImport) {
@@ -67,18 +91,42 @@ private static boolean isMisclassifiedTestFile(FileInput rootTree) {
6791
return pytestPatternVisitor.hasPytestPattern;
6892
}
6993

94+
static boolean isPathBasedMisclassifiedTestFile(String filePath) {
95+
if (filePath.isEmpty()) {
96+
return false;
97+
}
98+
99+
String normalizedPath = filePath.replace('\\', '/');
100+
String[] pathComponents = normalizedPath.split("/");
101+
// The filename itself is not checked
102+
for (int i = 0; i < pathComponents.length - 1; i++) {
103+
String component = pathComponents[i].toLowerCase(Locale.ROOT);
104+
if ("test".equals(component) || "tests".equals(component)) {
105+
return true;
106+
}
107+
}
108+
109+
return false;
110+
}
111+
70112
static long lineCount(FileInput rootTree) {
71113
return rootTree.lastToken().line();
72114
}
73115

74116
public TestFileTelemetry getTelemetry() {
75117
return new TestFileTelemetry(
76118
totalMainFiles.get(),
77-
misclassifiedTestFiles.get(),
119+
importBasedMisclassifiedTestFiles.get(),
78120
totalLines.get(),
79121
totalMainLines.get(),
80122
testLines.get(),
81-
misclassifiedTestLines.get());
123+
importBasedMisclassifiedTestLines.get(),
124+
pathBasedMisclassifiedTestFiles.get(),
125+
pathBasedMisclassifiedTestLines.get(),
126+
filesInImportBasedOnly.get(),
127+
filesInPathBasedOnly.get(),
128+
linesInImportBasedOnly.get(),
129+
linesInPathBasedOnly.get());
82130
}
83131

84132
private static class TestImportVisitor extends BaseTreeVisitor {

0 commit comments

Comments
 (0)