|
19 | 19 | */ |
20 | 20 | package org.sonar.plugins.python.cpd; |
21 | 21 |
|
22 | | -import com.sonar.sslr.api.AstNode; |
23 | | -import com.sonar.sslr.api.GenericTokenType; |
24 | | -import com.sonar.sslr.api.Token; |
25 | | -import com.sonar.sslr.api.TokenType; |
26 | | -import java.util.List; |
| 22 | +import com.intellij.openapi.editor.Document; |
| 23 | +import com.intellij.psi.PsiElement; |
| 24 | +import com.intellij.psi.tree.IElementType; |
| 25 | +import com.jetbrains.python.PyTokenTypes; |
| 26 | +import com.jetbrains.python.lexer.PythonIndentingLexer; |
| 27 | +import com.jetbrains.python.psi.PyElementType; |
| 28 | +import com.jetbrains.python.psi.PyFile; |
| 29 | +import java.util.Arrays; |
| 30 | +import java.util.HashSet; |
| 31 | +import java.util.Set; |
| 32 | +import javax.annotation.CheckForNull; |
27 | 33 | import org.sonar.api.batch.fs.InputFile; |
28 | 34 | import org.sonar.api.batch.sensor.SensorContext; |
29 | 35 | import org.sonar.api.batch.sensor.cpd.NewCpdTokens; |
30 | | -import org.sonar.python.PythonVisitorContext; |
31 | | -import org.sonar.python.TokenLocation; |
32 | | -import org.sonar.python.api.PythonTokenType; |
| 36 | +import org.sonar.api.utils.log.Logger; |
| 37 | +import org.sonar.api.utils.log.Loggers; |
| 38 | +import org.sonar.python.frontend.PythonParser; |
| 39 | +import org.sonar.python.frontend.PythonTokenLocation; |
33 | 40 |
|
34 | 41 | public class PythonCpdAnalyzer { |
35 | 42 |
|
36 | 43 | private final SensorContext context; |
| 44 | + private static final Set<PyElementType> IGNORED_TOKEN_TYPES = new HashSet<>(Arrays.asList( |
| 45 | + PyTokenTypes.LINE_BREAK, PyTokenTypes.DEDENT, PyTokenTypes.INDENT, PyTokenTypes.END_OF_LINE_COMMENT, PyTokenTypes.SPACE, PyTokenTypes.STATEMENT_BREAK)); |
| 46 | + private static final Logger LOG = Loggers.get(PythonCpdAnalyzer.class); |
37 | 47 |
|
38 | 48 | public PythonCpdAnalyzer(SensorContext context) { |
39 | 49 | this.context = context; |
40 | 50 | } |
41 | 51 |
|
42 | | - public void pushCpdTokens(InputFile inputFile, PythonVisitorContext visitorContext) { |
43 | | - AstNode root = visitorContext.rootTree(); |
44 | | - if (root != null) { |
45 | | - NewCpdTokens cpdTokens = context.newCpdTokens().onFile(inputFile); |
46 | | - List<Token> tokens = root.getTokens(); |
47 | | - for (int i = 0; i < tokens.size(); i++) { |
48 | | - Token token = tokens.get(i); |
49 | | - TokenType currentTokenType = token.getType(); |
50 | | - TokenType nextTokenType = i + 1 < tokens.size() ? tokens.get(i + 1).getType() : GenericTokenType.EOF; |
51 | | - // INDENT/DEDENT could not be completely ignored during CPD see https://docs.python.org/3/reference/lexical_analysis.html#indentation |
52 | | - // Just taking into account DEDENT is enough, but because the DEDENT token has an empty value, it's the |
53 | | - // preceding new line which is added in its place to create a difference |
54 | | - if (isNewLineWithIndentationChange(currentTokenType, nextTokenType) || !isIgnoredType(currentTokenType)) { |
55 | | - TokenLocation location = new TokenLocation(token); |
56 | | - cpdTokens.addToken(location.startLine(), location.startLineOffset(), location.endLine(), location.endLineOffset(), token.getValue()); |
| 52 | + public void pushCpdTokens(InputFile inputFile, PyFile pyFile, String fileContent) { |
| 53 | + Document document = getDocument(pyFile); |
| 54 | + if (document == null) { |
| 55 | + LOG.debug("Cannot complete CPD analysis: PSIDocument is null."); |
| 56 | + return; |
| 57 | + } |
| 58 | + PythonIndentingLexer lexer = new PythonIndentingLexer(); |
| 59 | + lexer.start(PythonParser.normalizeEol(fileContent)); |
| 60 | + NewCpdTokens cpdTokens = context.newCpdTokens().onFile(inputFile); |
| 61 | + IElementType prevTokenType = null; |
| 62 | + while (lexer.getTokenType() != null) { |
| 63 | + IElementType currentTokenType = lexer.getTokenType(); |
| 64 | + // INDENT/DEDENT could not be completely ignored during CPD see https://docs.python.org/3/reference/lexical_analysis.html#indentation |
| 65 | + // Just taking into account DEDENT is enough, but because the DEDENT token has an empty value, it's the |
| 66 | + // following new line which is added in its place to create a difference |
| 67 | + if (isNewLineWithIndentationChange(prevTokenType, currentTokenType) || !IGNORED_TOKEN_TYPES.contains(currentTokenType)) { |
| 68 | + int tokenEnd = lexer.getTokenEnd(); |
| 69 | + String tokenText = lexer.getTokenText(); |
| 70 | + if (currentTokenType == PyTokenTypes.LINE_BREAK) { |
| 71 | + tokenText = "\n"; |
| 72 | + tokenEnd = lexer.getTokenStart() + 1; |
57 | 73 | } |
| 74 | + PythonTokenLocation location = new PythonTokenLocation(lexer.getTokenStart(), tokenEnd, document); |
| 75 | + cpdTokens.addToken(location.startLine(), location.startLineOffset(), location.endLine(), location.endLineOffset(), tokenText); |
58 | 76 | } |
59 | | - cpdTokens.save(); |
| 77 | + prevTokenType = currentTokenType; |
| 78 | + lexer.advance(); |
60 | 79 | } |
| 80 | + |
| 81 | + cpdTokens.save(); |
61 | 82 | } |
62 | 83 |
|
63 | | - private static boolean isNewLineWithIndentationChange(TokenType currentTokenType, TokenType nextTokenType) { |
64 | | - return currentTokenType.equals(PythonTokenType.NEWLINE) && nextTokenType.equals(PythonTokenType.DEDENT); |
| 84 | + private static boolean isNewLineWithIndentationChange(@CheckForNull IElementType prevTokenType, IElementType currentTokenType) { |
| 85 | + return prevTokenType != null && prevTokenType == PyTokenTypes.DEDENT && currentTokenType == PyTokenTypes.LINE_BREAK; |
65 | 86 | } |
66 | 87 |
|
67 | | - private static boolean isIgnoredType(TokenType type) { |
68 | | - return type.equals(PythonTokenType.NEWLINE) || |
69 | | - type.equals(PythonTokenType.DEDENT) || |
70 | | - type.equals(PythonTokenType.INDENT) || |
71 | | - type.equals(GenericTokenType.EOF); |
| 88 | + @CheckForNull |
| 89 | + private static Document getDocument(PyFile pyFile) { |
| 90 | + PsiElement root = pyFile.getFirstChild(); |
| 91 | + if (root == null) { |
| 92 | + return null; |
| 93 | + } |
| 94 | + return root.getContainingFile().getViewProvider().getDocument(); |
72 | 95 | } |
73 | 96 |
|
74 | 97 | } |
0 commit comments