LeetCode 3455: Shortest Matching Substring (KMP Occurrences + Suffix Next Index)
LeetCode 3455KMPString MatchingToday we solve LeetCode 3455 - Shortest Matching Substring.
Source: https://leetcode.com/problems/shortest-matching-substring/
English
Problem Summary
Pattern p contains exactly two *. Split it as A*B*C. Each * matches any string (including empty). We need the shortest substring of s matching this full pattern.
Key Idea
A valid match is: pick one occurrence of A, then an occurrence of B starting after A ends, then an occurrence of C starting after B ends. Minimize end(C) - start(A).
So we need fast “next occurrence start at or after position x” queries for B and C.
Algorithm
1) Parse p into A, B, C.
2) Use KMP to collect all start positions where each block appears in s.
3) Build arrays nextB[i], nextC[i] = earliest start position in occurrence list that is ≥ i.
4) Enumerate each possible start of A (or every index if A empty), greedily jump to the earliest feasible B, then earliest feasible C.
5) Take minimum length.
Complexity
Let n = s.length. KMP for each block is linear, and building next arrays is linear.
Total: O(n + |p|) time, O(n) space.
Reference Implementations (Java / Go / C++ / Python / JavaScript)
import java.util.*;
class Solution {
public int shortestMatchingSubstring(String s, String p) {
int i1 = p.indexOf('*'), i2 = p.indexOf('*', i1 + 1);
String A = p.substring(0, i1), B = p.substring(i1 + 1, i2), C = p.substring(i2 + 1);
int n = s.length(), INF = 1_000_000_000, ans = INF;
List occA = A.isEmpty() ? new ArrayList<>() : kmp(s, A);
List occB = B.isEmpty() ? new ArrayList<>() : kmp(s, B);
List occC = C.isEmpty() ? new ArrayList<>() : kmp(s, C);
int[] nextB = buildNext(occB, n), nextC = buildNext(occC, n);
if (A.isEmpty()) {
for (int st = 0; st <= n; st++) ans = Math.min(ans, eval(st, st, B.length(), C.length(), n, nextB, nextC));
} else {
for (int st : occA) ans = Math.min(ans, eval(st, st + A.length(), B.length(), C.length(), n, nextB, nextC));
}
return ans >= INF ? -1 : ans;
}
private int eval(int st, int pos, int lenB, int lenC, int n, int[] nextB, int[] nextC) {
int bStart = lenB == 0 ? pos : (pos <= n ? nextB[pos] : -1);
if (bStart < 0) return 1_000_000_000;
int afterB = bStart + lenB;
int cStart = lenC == 0 ? afterB : (afterB <= n ? nextC[afterB] : -1);
if (cStart < 0) return 1_000_000_000;
return cStart + lenC - st;
}
private int[] buildNext(List occ, int n) {
int[] nxt = new int[n + 1];
Arrays.fill(nxt, -1);
int j = occ.size() - 1, cur = -1;
for (int i = n; i >= 0; i--) {
while (j >= 0 && occ.get(j) >= i) cur = occ.get(j--);
nxt[i] = cur;
}
return nxt;
}
private List kmp(String s, String p) {
int n = s.length(), m = p.length();
int[] lps = new int[m];
for (int i = 1, j = 0; i < m; i++) {
while (j > 0 && p.charAt(i) != p.charAt(j)) j = lps[j - 1];
if (p.charAt(i) == p.charAt(j)) j++;
lps[i] = j;
}
List res = new ArrayList<>();
for (int i = 0, j = 0; i < n; i++) {
while (j > 0 && s.charAt(i) != p.charAt(j)) j = lps[j - 1];
if (s.charAt(i) == p.charAt(j)) j++;
if (j == m) { res.add(i - m + 1); j = lps[j - 1]; }
}
return res;
}
} func shortestMatchingSubstring(s string, p string) int {
i1 := -1
for i, ch := range p { if ch == '*' { i1 = i; break } }
i2 := -1
for i := i1 + 1; i < len(p); i++ { if p[i] == '*' { i2 = i; break } }
A, B, C := p[:i1], p[i1+1:i2], p[i2+1:]
n := len(s)
const INF = int(1e9)
ans := INF
occA := []int{}
if len(A) > 0 { occA = kmp(s, A) }
occB := []int{}
if len(B) > 0 { occB = kmp(s, B) }
occC := []int{}
if len(C) > 0 { occC = kmp(s, C) }
nextB := buildNext(occB, n)
nextC := buildNext(occC, n)
eval := func(st, pos int) int {
bStart := pos
if len(B) > 0 {
if pos > n || nextB[pos] < 0 { return INF }
bStart = nextB[pos]
}
afterB := bStart + len(B)
cStart := afterB
if len(C) > 0 {
if afterB > n || nextC[afterB] < 0 { return INF }
cStart = nextC[afterB]
}
return cStart + len(C) - st
}
if len(A) == 0 { for st := 0; st <= n; st++ { if v := eval(st, st); v < ans { ans = v } } } else {
for _, st := range occA { if v := eval(st, st+len(A)); v < ans { ans = v } }
}
if ans >= INF { return -1 }
return ans
}
func buildNext(occ []int, n int) []int {
nxt := make([]int, n+1)
for i := range nxt { nxt[i] = -1 }
j, cur := len(occ)-1, -1
for i := n; i >= 0; i-- {
for j >= 0 && occ[j] >= i { cur = occ[j]; j-- }
nxt[i] = cur
}
return nxt
}
func kmp(s, p string) []int {
m := len(p); lps := make([]int, m)
for i, j := 1, 0; i < m; i++ { for j > 0 && p[i] != p[j] { j = lps[j-1] }; if p[i] == p[j] { j++ }; lps[i] = j }
res := []int{}
for i, j := 0, 0; i < len(s); i++ { for j > 0 && s[i] != p[j] { j = lps[j-1] }; if s[i] == p[j] { j++ }; if j == m { res = append(res, i-m+1); j = lps[j-1] } }
return res
}class Solution {
vector kmp(const string& s, const string& p){
int n=s.size(), m=p.size(); vector lps(m,0), res;
for(int i=1,j=0;i buildNext(const vector& occ,int n){
vector nxt(n+1,-1); int j=(int)occ.size()-1,cur=-1;
for(int i=n;i>=0;i--){ while(j>=0&&occ[j]>=i){ cur=occ[j--]; } nxt[i]=cur; }
return nxt;
}
public:
int shortestMatchingSubstring(string s, string p) {
int i1=p.find('*'), i2=p.find('*',i1+1); string A=p.substr(0,i1), B=p.substr(i1+1,i2-i1-1), C=p.substr(i2+1);
int n=s.size(), INF=1e9, ans=INF;
vector occA=A.empty()?vector{}:kmp(s,A), occB=B.empty()?vector{}:kmp(s,B), occC=C.empty()?vector{}:kmp(s,C);
auto nextB=buildNext(occB,n), nextC=buildNext(occC,n);
auto eval=[&](int st,int pos){
int bStart=pos; if(!B.empty()){ if(pos>n||nextB[pos]<0) return INF; bStart=nextB[pos]; }
int afterB=bStart+(int)B.size(); int cStart=afterB; if(!C.empty()){ if(afterB>n||nextC[afterB]<0) return INF; cStart=nextC[afterB]; }
return cStart+(int)C.size()-st;
};
if(A.empty()) for(int st=0;st<=n;st++) ans=min(ans,eval(st,st)); else for(int st:occA) ans=min(ans,eval(st,st+(int)A.size()));
return ans>=INF?-1:ans;
}
}; class Solution:
def shortestMatchingSubstring(self, s: str, p: str) -> int:
i1 = p.find('*')
i2 = p.find('*', i1 + 1)
A, B, C = p[:i1], p[i1 + 1:i2], p[i2 + 1:]
n = len(s)
def kmp(text: str, pat: str):
m = len(pat)
lps = [0] * m
j = 0
for i in range(1, m):
while j and pat[i] != pat[j]:
j = lps[j - 1]
if pat[i] == pat[j]:
j += 1
lps[i] = j
res, j = [], 0
for i, ch in enumerate(text):
while j and ch != pat[j]:
j = lps[j - 1]
if ch == pat[j]:
j += 1
if j == m:
res.append(i - m + 1)
j = lps[j - 1]
return res
def build_next(occ):
nxt = [-1] * (n + 1)
j, cur = len(occ) - 1, -1
for i in range(n, -1, -1):
while j >= 0 and occ[j] >= i:
cur = occ[j]
j -= 1
nxt[i] = cur
return nxt
occA = kmp(s, A) if A else []
occB = kmp(s, B) if B else []
occC = kmp(s, C) if C else []
nextB, nextC = build_next(occB), build_next(occC)
INF = 10**9
def eval_one(st, pos):
b_start = pos
if B:
if pos > n or nextB[pos] < 0:
return INF
b_start = nextB[pos]
after_b = b_start + len(B)
c_start = after_b
if C:
if after_b > n or nextC[after_b] < 0:
return INF
c_start = nextC[after_b]
return c_start + len(C) - st
ans = INF
if A:
for st in occA:
ans = min(ans, eval_one(st, st + len(A)))
else:
for st in range(n + 1):
ans = min(ans, eval_one(st, st))
return -1 if ans >= INF else ansvar shortestMatchingSubstring = function(s, p) {
const i1 = p.indexOf('*');
const i2 = p.indexOf('*', i1 + 1);
const A = p.slice(0, i1), B = p.slice(i1 + 1, i2), C = p.slice(i2 + 1);
const n = s.length, INF = 1e9;
const kmp = (text, pat) => {
const m = pat.length, lps = Array(m).fill(0), res = [];
for (let i = 1, j = 0; i < m; i++) {
while (j > 0 && pat[i] !== pat[j]) j = lps[j - 1];
if (pat[i] === pat[j]) j++;
lps[i] = j;
}
for (let i = 0, j = 0; i < text.length; i++) {
while (j > 0 && text[i] !== pat[j]) j = lps[j - 1];
if (text[i] === pat[j]) j++;
if (j === m) { res.push(i - m + 1); j = lps[j - 1]; }
}
return res;
};
const buildNext = (occ) => {
const nxt = Array(n + 1).fill(-1);
let j = occ.length - 1, cur = -1;
for (let i = n; i >= 0; i--) {
while (j >= 0 && occ[j] >= i) cur = occ[j--];
nxt[i] = cur;
}
return nxt;
};
const occA = A.length ? kmp(s, A) : [];
const occB = B.length ? kmp(s, B) : [];
const occC = C.length ? kmp(s, C) : [];
const nextB = buildNext(occB), nextC = buildNext(occC);
const evalOne = (st, pos) => {
let bStart = pos;
if (B.length) { if (pos > n || nextB[pos] < 0) return INF; bStart = nextB[pos]; }
const afterB = bStart + B.length;
let cStart = afterB;
if (C.length) { if (afterB > n || nextC[afterB] < 0) return INF; cStart = nextC[afterB]; }
return cStart + C.length - st;
};
let ans = INF;
if (A.length) {
for (const st of occA) ans = Math.min(ans, evalOne(st, st + A.length));
} else {
for (let st = 0; st <= n; st++) ans = Math.min(ans, evalOne(st, st));
}
return ans >= INF ? -1 : ans;
};中文
题目概述
模式串 p 恰好有两个 *,可写成 A*B*C。* 可匹配任意长度字符串(含空串)。要找出 s 中最短的一个子串,使其能匹配整个模式。
核心思路
合法匹配必须满足顺序:先匹配 A,再匹配 B,再匹配 C,且后一个起点不早于前一个结束位置。
所以问题变成:固定 A 的起点后,如何快速找到“最早可用的 B 起点”和“最早可用的 C 起点”。
算法步骤
1)把 p 拆成 A/B/C。
2)分别用 KMP 找出 A/B/C 在 s 中的所有出现起点。
3)构建 nextB[i]、nextC[i]:表示从位置 i 往后第一个可用出现起点。
4)枚举每个 A 起点(若 A 为空就枚举所有起点),贪心接上最早可行的 B 和 C。
5)更新最短长度。
复杂度分析
总时间复杂度 O(n + |p|),空间复杂度 O(n)。
多语言参考实现(Java / Go / C++ / Python / JavaScript)
import java.util.*;
class Solution {
public int shortestMatchingSubstring(String s, String p) {
int i1 = p.indexOf('*'), i2 = p.indexOf('*', i1 + 1);
String A = p.substring(0, i1), B = p.substring(i1 + 1, i2), C = p.substring(i2 + 1);
int n = s.length(), INF = 1_000_000_000, ans = INF;
List occA = A.isEmpty() ? new ArrayList<>() : kmp(s, A);
List occB = B.isEmpty() ? new ArrayList<>() : kmp(s, B);
List occC = C.isEmpty() ? new ArrayList<>() : kmp(s, C);
int[] nextB = buildNext(occB, n), nextC = buildNext(occC, n);
if (A.isEmpty()) {
for (int st = 0; st <= n; st++) ans = Math.min(ans, eval(st, st, B.length(), C.length(), n, nextB, nextC));
} else {
for (int st : occA) ans = Math.min(ans, eval(st, st + A.length(), B.length(), C.length(), n, nextB, nextC));
}
return ans >= INF ? -1 : ans;
}
private int eval(int st, int pos, int lenB, int lenC, int n, int[] nextB, int[] nextC) {
int bStart = lenB == 0 ? pos : (pos <= n ? nextB[pos] : -1);
if (bStart < 0) return 1_000_000_000;
int afterB = bStart + lenB;
int cStart = lenC == 0 ? afterB : (afterB <= n ? nextC[afterB] : -1);
if (cStart < 0) return 1_000_000_000;
return cStart + lenC - st;
}
private int[] buildNext(List occ, int n) {
int[] nxt = new int[n + 1];
Arrays.fill(nxt, -1);
int j = occ.size() - 1, cur = -1;
for (int i = n; i >= 0; i--) {
while (j >= 0 && occ.get(j) >= i) cur = occ.get(j--);
nxt[i] = cur;
}
return nxt;
}
private List kmp(String s, String p) {
int n = s.length(), m = p.length();
int[] lps = new int[m];
for (int i = 1, j = 0; i < m; i++) {
while (j > 0 && p.charAt(i) != p.charAt(j)) j = lps[j - 1];
if (p.charAt(i) == p.charAt(j)) j++;
lps[i] = j;
}
List res = new ArrayList<>();
for (int i = 0, j = 0; i < n; i++) {
while (j > 0 && s.charAt(i) != p.charAt(j)) j = lps[j - 1];
if (s.charAt(i) == p.charAt(j)) j++;
if (j == m) { res.add(i - m + 1); j = lps[j - 1]; }
}
return res;
}
} func shortestMatchingSubstring(s string, p string) int {
i1 := -1
for i, ch := range p { if ch == '*' { i1 = i; break } }
i2 := -1
for i := i1 + 1; i < len(p); i++ { if p[i] == '*' { i2 = i; break } }
A, B, C := p[:i1], p[i1+1:i2], p[i2+1:]
n := len(s)
const INF = int(1e9)
ans := INF
occA := []int{}
if len(A) > 0 { occA = kmp(s, A) }
occB := []int{}
if len(B) > 0 { occB = kmp(s, B) }
occC := []int{}
if len(C) > 0 { occC = kmp(s, C) }
nextB := buildNext(occB, n)
nextC := buildNext(occC, n)
eval := func(st, pos int) int {
bStart := pos
if len(B) > 0 {
if pos > n || nextB[pos] < 0 { return INF }
bStart = nextB[pos]
}
afterB := bStart + len(B)
cStart := afterB
if len(C) > 0 {
if afterB > n || nextC[afterB] < 0 { return INF }
cStart = nextC[afterB]
}
return cStart + len(C) - st
}
if len(A) == 0 { for st := 0; st <= n; st++ { if v := eval(st, st); v < ans { ans = v } } } else {
for _, st := range occA { if v := eval(st, st+len(A)); v < ans { ans = v } }
}
if ans >= INF { return -1 }
return ans
}
func buildNext(occ []int, n int) []int {
nxt := make([]int, n+1)
for i := range nxt { nxt[i] = -1 }
j, cur := len(occ)-1, -1
for i := n; i >= 0; i-- {
for j >= 0 && occ[j] >= i { cur = occ[j]; j-- }
nxt[i] = cur
}
return nxt
}
func kmp(s, p string) []int {
m := len(p); lps := make([]int, m)
for i, j := 1, 0; i < m; i++ { for j > 0 && p[i] != p[j] { j = lps[j-1] }; if p[i] == p[j] { j++ }; lps[i] = j }
res := []int{}
for i, j := 0, 0; i < len(s); i++ { for j > 0 && s[i] != p[j] { j = lps[j-1] }; if s[i] == p[j] { j++ }; if j == m { res = append(res, i-m+1); j = lps[j-1] } }
return res
}class Solution {
vector kmp(const string& s, const string& p){
int n=s.size(), m=p.size(); vector lps(m,0), res;
for(int i=1,j=0;i buildNext(const vector& occ,int n){
vector nxt(n+1,-1); int j=(int)occ.size()-1,cur=-1;
for(int i=n;i>=0;i--){ while(j>=0&&occ[j]>=i){ cur=occ[j--]; } nxt[i]=cur; }
return nxt;
}
public:
int shortestMatchingSubstring(string s, string p) {
int i1=p.find('*'), i2=p.find('*',i1+1); string A=p.substr(0,i1), B=p.substr(i1+1,i2-i1-1), C=p.substr(i2+1);
int n=s.size(), INF=1e9, ans=INF;
vector occA=A.empty()?vector{}:kmp(s,A), occB=B.empty()?vector{}:kmp(s,B), occC=C.empty()?vector{}:kmp(s,C);
auto nextB=buildNext(occB,n), nextC=buildNext(occC,n);
auto eval=[&](int st,int pos){
int bStart=pos; if(!B.empty()){ if(pos>n||nextB[pos]<0) return INF; bStart=nextB[pos]; }
int afterB=bStart+(int)B.size(); int cStart=afterB; if(!C.empty()){ if(afterB>n||nextC[afterB]<0) return INF; cStart=nextC[afterB]; }
return cStart+(int)C.size()-st;
};
if(A.empty()) for(int st=0;st<=n;st++) ans=min(ans,eval(st,st)); else for(int st:occA) ans=min(ans,eval(st,st+(int)A.size()));
return ans>=INF?-1:ans;
}
}; class Solution:
def shortestMatchingSubstring(self, s: str, p: str) -> int:
i1 = p.find('*')
i2 = p.find('*', i1 + 1)
A, B, C = p[:i1], p[i1 + 1:i2], p[i2 + 1:]
n = len(s)
def kmp(text: str, pat: str):
m = len(pat)
lps = [0] * m
j = 0
for i in range(1, m):
while j and pat[i] != pat[j]:
j = lps[j - 1]
if pat[i] == pat[j]:
j += 1
lps[i] = j
res, j = [], 0
for i, ch in enumerate(text):
while j and ch != pat[j]:
j = lps[j - 1]
if ch == pat[j]:
j += 1
if j == m:
res.append(i - m + 1)
j = lps[j - 1]
return res
def build_next(occ):
nxt = [-1] * (n + 1)
j, cur = len(occ) - 1, -1
for i in range(n, -1, -1):
while j >= 0 and occ[j] >= i:
cur = occ[j]
j -= 1
nxt[i] = cur
return nxt
occA = kmp(s, A) if A else []
occB = kmp(s, B) if B else []
occC = kmp(s, C) if C else []
nextB, nextC = build_next(occB), build_next(occC)
INF = 10**9
def eval_one(st, pos):
b_start = pos
if B:
if pos > n or nextB[pos] < 0:
return INF
b_start = nextB[pos]
after_b = b_start + len(B)
c_start = after_b
if C:
if after_b > n or nextC[after_b] < 0:
return INF
c_start = nextC[after_b]
return c_start + len(C) - st
ans = INF
if A:
for st in occA:
ans = min(ans, eval_one(st, st + len(A)))
else:
for st in range(n + 1):
ans = min(ans, eval_one(st, st))
return -1 if ans >= INF else ansvar shortestMatchingSubstring = function(s, p) {
const i1 = p.indexOf('*');
const i2 = p.indexOf('*', i1 + 1);
const A = p.slice(0, i1), B = p.slice(i1 + 1, i2), C = p.slice(i2 + 1);
const n = s.length, INF = 1e9;
const kmp = (text, pat) => {
const m = pat.length, lps = Array(m).fill(0), res = [];
for (let i = 1, j = 0; i < m; i++) {
while (j > 0 && pat[i] !== pat[j]) j = lps[j - 1];
if (pat[i] === pat[j]) j++;
lps[i] = j;
}
for (let i = 0, j = 0; i < text.length; i++) {
while (j > 0 && text[i] !== pat[j]) j = lps[j - 1];
if (text[i] === pat[j]) j++;
if (j === m) { res.push(i - m + 1); j = lps[j - 1]; }
}
return res;
};
const buildNext = (occ) => {
const nxt = Array(n + 1).fill(-1);
let j = occ.length - 1, cur = -1;
for (let i = n; i >= 0; i--) {
while (j >= 0 && occ[j] >= i) cur = occ[j--];
nxt[i] = cur;
}
return nxt;
};
const occA = A.length ? kmp(s, A) : [];
const occB = B.length ? kmp(s, B) : [];
const occC = C.length ? kmp(s, C) : [];
const nextB = buildNext(occB), nextC = buildNext(occC);
const evalOne = (st, pos) => {
let bStart = pos;
if (B.length) { if (pos > n || nextB[pos] < 0) return INF; bStart = nextB[pos]; }
const afterB = bStart + B.length;
let cStart = afterB;
if (C.length) { if (afterB > n || nextC[afterB] < 0) return INF; cStart = nextC[afterB]; }
return cStart + C.length - st;
};
let ans = INF;
if (A.length) {
for (const st of occA) ans = Math.min(ans, evalOne(st, st + A.length));
} else {
for (let st = 0; st <= n; st++) ans = Math.min(ans, evalOne(st, st));
}
return ans >= INF ? -1 : ans;
};
Comments