Skip to content

Commit e6ded20

Browse files
committed
#81: Implement ignoring of multiline strings and comments for //selfieonce
Key changes: - Implement escaping rules for escaping multiline strings, block-style comments /**/, javadoc-style comments /***/ during //selfieonce replacement - Add comprehensive test suite in `RemoveSelfieOnceCommentTest` to verify correct behavior in various scenarios - Update one test case in `InteractiveTest`
1 parent 75eaaec commit e6ded20

File tree

4 files changed

+928
-6
lines changed

4 files changed

+928
-6
lines changed
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
/*
2+
* Copyright (C) 2025 DiffPlug
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.diffplug.selfie.guts
17+
18+
internal object RemoveSelfieOnceComment {
19+
// Regex to match a valid "//selfieonce" comment (with optional whitespace)
20+
private val selfieOnceRegex = "^\\s*//\\s*selfieonce\\s*\$".toRegex()
21+
22+
/**
23+
* Removes all "//selfieonce" comments from the source code. Ignores occurrences inside string
24+
* literals and block comments.
25+
*
26+
* @param source the source code to be processed
27+
* @return the source code with all "//selfieonce" comments removed. If no comments were found,
28+
* the source code is returned unchanged.
29+
*/
30+
fun removeSelfieComment(source: String): String {
31+
val rangesForRemoval = findSelfieOnceCommentRanges(source)
32+
if (rangesForRemoval.isEmpty()) {
33+
return source
34+
}
35+
return applyRemovals(source, rangesForRemoval)
36+
}
37+
38+
/**
39+
* Finds all "//selfieonce" comments in the source code and returns their ranges. Ignores
40+
* occurrences inside string literals and block comments.
41+
*
42+
* @param source the source code to be processed
43+
* @return a list of integer pairs representing the start and end index of each comment
44+
*/
45+
private fun findSelfieOnceCommentRanges(source: String): List<Pair<Int, Int>> {
46+
// Track the last 3 characters to detect triple quotes and comments
47+
var currentChar = '0'
48+
var prevChar = '0'
49+
var prevPrevChar = '0'
50+
51+
// State tracking
52+
var isInsideBlockComment = false
53+
var isInsideStringLiteral = false
54+
var isInsideInlineComment = false
55+
var commentStartIdx = -1
56+
val contentBuilder = StringBuilder()
57+
val rangesForRemoval = mutableListOf<Pair<Int, Int>>()
58+
59+
for (srcCharIndex in source.indices) {
60+
// Update character history
61+
prevPrevChar = prevChar
62+
prevChar = currentChar
63+
currentChar = source[srcCharIndex]
64+
val isEndOfInlineComment =
65+
isInsideInlineComment && (currentChar == '\r' || currentChar == '\n')
66+
if (isEndOfInlineComment) {
67+
isInsideInlineComment = false
68+
}
69+
// Update parsing state based on the current character
70+
val isStartOfTripleQuoteString =
71+
!isInsideInlineComment &&
72+
!isInsideBlockComment &&
73+
currentChar == '"' &&
74+
prevChar == '"' &&
75+
prevPrevChar == '"'
76+
if (isStartOfTripleQuoteString) {
77+
// Toggle string literal state on triple quotes only if not in a comment
78+
isInsideStringLiteral = !isInsideStringLiteral
79+
} else if (!isInsideStringLiteral && currentChar == '*' && prevChar == '/') {
80+
// Enter block comment
81+
isInsideBlockComment = true
82+
} else if (isInsideBlockComment && currentChar == '/' && prevChar == '*') {
83+
// Exit block comment
84+
isInsideBlockComment = false
85+
} else if (!isInsideStringLiteral &&
86+
!isInsideBlockComment &&
87+
currentChar == '/' &&
88+
prevChar == '/') {
89+
// Enter line comment
90+
isInsideInlineComment = true
91+
}
92+
93+
// Skip processing if inside a block comment or string literal
94+
if (isInsideBlockComment || isInsideStringLiteral) {
95+
continue
96+
}
97+
if (commentStartIdx != -1) {
98+
val isEndOfCommentLine =
99+
currentChar == '\r' || currentChar == '\n' || srcCharIndex == source.lastIndex
100+
if (isEndOfCommentLine) {
101+
val isEndOfFile =
102+
srcCharIndex == source.lastIndex && currentChar != '\r' && currentChar != '\n'
103+
if (isEndOfFile) {
104+
contentBuilder.append(currentChar)
105+
}
106+
val content = contentBuilder.toString()
107+
val isSelfieComment = "//$content".matches(selfieOnceRegex)
108+
if (isSelfieComment) {
109+
val range = resolveRange(source, commentStartIdx)
110+
rangesForRemoval.add(range)
111+
}
112+
// Reset comment tracking
113+
commentStartIdx = -1
114+
contentBuilder.setLength(0)
115+
} else {
116+
val isNewLine = currentChar != '\r' && currentChar != '\n'
117+
if (isNewLine) {
118+
contentBuilder.append(currentChar)
119+
}
120+
}
121+
} else if (currentChar == '/' && prevChar == '/') {
122+
commentStartIdx = srcCharIndex - 1
123+
}
124+
}
125+
126+
return rangesForRemoval
127+
}
128+
129+
/** Applies all removals to the source string. */
130+
private fun applyRemovals(source: String, rangesForRemoval: List<Pair<Int, Int>>): String {
131+
var result = source
132+
var offset = 0
133+
for ((rangeFrom, rangeTo) in rangesForRemoval) {
134+
result = result.replaceRange(rangeFrom - offset, rangeTo - offset, "")
135+
offset += rangeTo - rangeFrom
136+
}
137+
return result
138+
}
139+
140+
/**
141+
* Resolves the range of a comment, preserving possible source code before the comment. If the comment is
142+
* on its own line, then the range will include the whole line. If there's source code before the
143+
* comment, only includes the comment part and any whitespace between source code and comment.
144+
*/
145+
private fun resolveRange(source: String, atIndex: Int): Pair<Int, Int> {
146+
val commentStartIndex = atIndex
147+
val lineStartIndex = findLineStart(source, atIndex)
148+
val hasCodeBeforeComment = hasSourceCodeBeforeComment(source, lineStartIndex, commentStartIndex)
149+
val fromIndex =
150+
determineRemovalStartIndex(source, lineStartIndex, commentStartIndex, hasCodeBeforeComment)
151+
val toIndex = findLineEnd(source, commentStartIndex)
152+
153+
return Pair(fromIndex, toIndex)
154+
}
155+
156+
/** Finds the start index of the line containing the given index. */
157+
private fun findLineStart(source: String, fromIndex: Int): Int {
158+
var lineStartIndex = fromIndex
159+
while (lineStartIndex > 0) {
160+
val prevChar = source[lineStartIndex - 1]
161+
if (prevChar == '\n' || prevChar == '\r') {
162+
break
163+
}
164+
lineStartIndex--
165+
}
166+
167+
return lineStartIndex
168+
}
169+
170+
/** Checks if there's any non-whitespace content between line start and comment start. */
171+
private fun hasSourceCodeBeforeComment(
172+
source: String,
173+
lineStartIndex: Int,
174+
commentStartIndex: Int
175+
): Boolean {
176+
for (idx in lineStartIndex until commentStartIndex) {
177+
if (!source[idx].isWhitespace()) {
178+
return true
179+
}
180+
}
181+
return false
182+
}
183+
184+
/**
185+
* Determines the start index for removal based on whether there's some source code before the
186+
* comment.
187+
*/
188+
private fun determineRemovalStartIndex(
189+
source: String,
190+
lineStartIndex: Int,
191+
commentStartIndex: Int,
192+
hasCodeBeforeComment: Boolean
193+
): Int {
194+
return if (hasCodeBeforeComment) {
195+
var lastCodeIndex = commentStartIndex - 1
196+
while (lastCodeIndex >= lineStartIndex && source[lastCodeIndex].isWhitespace()) {
197+
lastCodeIndex--
198+
}
199+
return lastCodeIndex + 1
200+
} else {
201+
lineStartIndex
202+
}
203+
}
204+
205+
/** Finds the end index of the line containing the given index. */
206+
private fun findLineEnd(source: String, fromIndex: Int): Int {
207+
var toIndex = fromIndex
208+
while (toIndex <= source.lastIndex && !(source[toIndex] == '\n' || source[toIndex] == '\r')) {
209+
toIndex++
210+
}
211+
return toIndex
212+
}
213+
}

jvm/selfie-lib/src/commonMain/kotlin/com/diffplug/selfie/guts/SourceFile.kt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2023-2024 DiffPlug
2+
* Copyright (C) 2023-2025 DiffPlug
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -88,9 +88,7 @@ class SourceFile(filename: String, content: String, val language: Language) {
8888
}
8989
}
9090
fun removeSelfieOnceComments() {
91-
// TODO: there is a bug here due to string constants, and non-C file comments
92-
contentSlice =
93-
Slice(contentSlice.toString().replace("//selfieonce", "").replace("// selfieonce", ""))
91+
contentSlice = Slice(RemoveSelfieOnceComment.removeSelfieComment(contentSlice.toString()))
9492
}
9593
private fun findOnLine(toFind: String, lineOneIndexed: Int): Slice {
9694
val lineContent = contentSlice.unixLine(lineOneIndexed)

0 commit comments

Comments
 (0)