LeetCode 2788: Split Strings by Separator (Linear Scan Token Builder)
LeetCode 2788StringSimulationToday we solve LeetCode 2788 - Split Strings by Separator.
Source: https://leetcode.com/problems/split-strings-by-separator/
English
Problem Summary
Given an array of strings words and a separator character separator, split each word by the separator and return all non-empty pieces in original order.
Key Insight
Use one linear scan token builder. Whenever we see the separator, flush current token if non-empty. At the end of each word, flush again. This avoids regex edge cases and naturally skips empty parts.
Algorithm
- Initialize empty answer list.
- For each word, maintain a mutable token buffer.
- If char is separator: push token if non-empty, then clear token.
- Else append char to token.
- After finishing the word, push token if non-empty.
- Return answer list.
Complexity Analysis
Let total character count be N.
Time: O(N).
Space: O(N) for output (plus token buffer).
Common Pitfalls
- Keeping empty strings caused by consecutive separators.
- Forgetting to flush the last token of each word.
- Using regex split directly in languages where special chars need escaping.
Reference Implementations (Java / Go / C++ / Python / JavaScript)
class Solution {
public List<String> splitWordsBySeparator(List<String> words, char separator) {
List<String> ans = new ArrayList<>();
for (String w : words) {
StringBuilder cur = new StringBuilder();
for (int i = 0; i < w.length(); i++) {
char ch = w.charAt(i);
if (ch == separator) {
if (cur.length() > 0) {
ans.add(cur.toString());
cur.setLength(0);
}
} else {
cur.append(ch);
}
}
if (cur.length() > 0) {
ans.add(cur.toString());
}
}
return ans;
}
}func splitWordsBySeparator(words []string, separator byte) []string {
ans := make([]string, 0)
for _, w := range words {
cur := make([]byte, 0)
for i := 0; i < len(w); i++ {
if w[i] == separator {
if len(cur) > 0 {
ans = append(ans, string(cur))
cur = cur[:0]
}
} else {
cur = append(cur, w[i])
}
}
if len(cur) > 0 {
ans = append(ans, string(cur))
}
}
return ans
}class Solution {
public:
vector<string> splitWordsBySeparator(vector<string>& words, char separator) {
vector<string> ans;
for (const string& w : words) {
string cur;
for (char ch : w) {
if (ch == separator) {
if (!cur.empty()) {
ans.push_back(cur);
cur.clear();
}
} else {
cur.push_back(ch);
}
}
if (!cur.empty()) {
ans.push_back(cur);
}
}
return ans;
}
};class Solution:
def splitWordsBySeparator(self, words: List[str], separator: str) -> List[str]:
ans = []
for w in words:
cur = []
for ch in w:
if ch == separator:
if cur:
ans.append("".join(cur))
cur = []
else:
cur.append(ch)
if cur:
ans.append("".join(cur))
return ansvar splitWordsBySeparator = function(words, separator) {
const ans = [];
for (const w of words) {
let cur = "";
for (const ch of w) {
if (ch === separator) {
if (cur.length > 0) {
ans.push(cur);
cur = "";
}
} else {
cur += ch;
}
}
if (cur.length > 0) {
ans.push(cur);
}
}
return ans;
};中文
题目概述
给定字符串数组 words 和分隔字符 separator,把每个字符串按分隔符切开,按原顺序收集所有非空子串。
核心思路
用一次线性扫描构造 token。遇到分隔符就把当前 token(若非空)加入答案并清空。每个单词结束后再 flush 一次,就能自然过滤空串。
算法步骤
- 初始化结果数组。
- 遍历每个单词,维护当前 token 缓冲区。
- 若字符是分隔符:token 非空则入结果并清空。
- 否则将字符追加到 token。
- 单词结束后再检查并追加 token。
- 返回结果。
复杂度分析
设所有字符串总长度为 N。
时间复杂度:O(N)。
空间复杂度:O(N)(主要为输出与临时 token)。
常见陷阱
- 连续分隔符产生空串却没有过滤。
- 忘记处理每个单词末尾的最后一个 token。
- 直接用正则 split,特殊字符未转义导致行为错误。
多语言参考实现(Java / Go / C++ / Python / JavaScript)
class Solution {
public List<String> splitWordsBySeparator(List<String> words, char separator) {
List<String> ans = new ArrayList<>();
for (String w : words) {
StringBuilder cur = new StringBuilder();
for (int i = 0; i < w.length(); i++) {
char ch = w.charAt(i);
if (ch == separator) {
if (cur.length() > 0) {
ans.add(cur.toString());
cur.setLength(0);
}
} else {
cur.append(ch);
}
}
if (cur.length() > 0) {
ans.add(cur.toString());
}
}
return ans;
}
}func splitWordsBySeparator(words []string, separator byte) []string {
ans := make([]string, 0)
for _, w := range words {
cur := make([]byte, 0)
for i := 0; i < len(w); i++ {
if w[i] == separator {
if len(cur) > 0 {
ans = append(ans, string(cur))
cur = cur[:0]
}
} else {
cur = append(cur, w[i])
}
}
if len(cur) > 0 {
ans = append(ans, string(cur))
}
}
return ans
}class Solution {
public:
vector<string> splitWordsBySeparator(vector<string>& words, char separator) {
vector<string> ans;
for (const string& w : words) {
string cur;
for (char ch : w) {
if (ch == separator) {
if (!cur.empty()) {
ans.push_back(cur);
cur.clear();
}
} else {
cur.push_back(ch);
}
}
if (!cur.empty()) {
ans.push_back(cur);
}
}
return ans;
}
};class Solution:
def splitWordsBySeparator(self, words: List[str], separator: str) -> List[str]:
ans = []
for w in words:
cur = []
for ch in w:
if ch == separator:
if cur:
ans.append("".join(cur))
cur = []
else:
cur.append(ch)
if cur:
ans.append("".join(cur))
return ansvar splitWordsBySeparator = function(words, separator) {
const ans = [];
for (const w of words) {
let cur = "";
for (const ch of w) {
if (ch === separator) {
if (cur.length > 0) {
ans.push(cur);
cur = "";
}
} else {
cur += ch;
}
}
if (cur.length > 0) {
ans.push(cur);
}
}
return ans;
};
Comments