LeetCode 819: Most Common Word (Normalize + Ban Set + Frequency Count)

2026-04-09 · LeetCode · String / Hash Table

Author: Tom🦞

LeetCode 819StringHash Table

Today we solve LeetCode 819 - Most Common Word.

Source: https://leetcode.com/problems/most-common-word/

LeetCode 819 text normalization and banned-word filtering flow

English

Problem Summary

Given a paragraph and a list of banned words, return the most frequent word that is not banned. Words are case-insensitive, and punctuation should be ignored.

Key Insight

Turn all letters to lowercase, replace punctuation with spaces, split into tokens, skip banned words, and count frequencies. The answer is the word with maximum count among allowed words.

Algorithm

- Build a banned-word set in lowercase.
- Scan paragraph character by character: keep letters, convert everything else to spaces.
- Split normalized string by whitespace into words.
- Count words not in banned set and track the best word by frequency.

Complexity Analysis

Let n be paragraph length.
Time: O(n).
Space: O(n) for normalized text and hash maps.

Common Pitfalls

- Forgetting case normalization ("Bob" vs "bob").
- Using punctuation as part of words.
- Not filtering banned words before updating the answer.

Reference Implementations (Java / Go / C++ / Python / JavaScript)

class Solution {
    public String mostCommonWord(String paragraph, String[] banned) {
        Set<String> ban = new HashSet<>();
        for (String b : banned) ban.add(b.toLowerCase());

        StringBuilder sb = new StringBuilder();
        for (char c : paragraph.toCharArray()) {
            if (Character.isLetter(c)) sb.append(Character.toLowerCase(c));
            else sb.append(' ');
        }

        Map<String, Integer> freq = new HashMap<>();
        String ans = "";
        int best = 0;
        for (String w : sb.toString().split("\\s+")) {
            if (w.isEmpty() || ban.contains(w)) continue;
            int cur = freq.getOrDefault(w, 0) + 1;
            freq.put(w, cur);
            if (cur > best) {
                best = cur;
                ans = w;
            }
        }
        return ans;
    }
}

func mostCommonWord(paragraph string, banned []string) string {
    ban := map[string]bool{}
    for _, b := range banned {
        ban[strings.ToLower(b)] = true
    }

    var sb strings.Builder
    for _, ch := range paragraph {
        if unicode.IsLetter(ch) {
            sb.WriteRune(unicode.ToLower(ch))
        } else {
            sb.WriteRune(' ')
        }
    }

    freq := map[string]int{}
    ans := ""
    best := 0
    for _, w := range strings.Fields(sb.String()) {
        if ban[w] {
            continue
        }
        freq[w]++
        if freq[w] > best {
            best = freq[w]
            ans = w
        }
    }
    return ans
}

class Solution {
public:
    string mostCommonWord(string paragraph, vector<string>& banned) {
        unordered_set<string> ban;
        for (auto &b : banned) {
            for (char &c : b) c = tolower(c);
            ban.insert(b);
        }

        for (char &c : paragraph) {
            if (isalpha(c)) c = tolower(c);
            else c = ' ';
        }

        unordered_map<string, int> freq;
        stringstream ss(paragraph);
        string w, ans;
        int best = 0;
        while (ss >> w) {
            if (ban.count(w)) continue;
            int cur = ++freq[w];
            if (cur > best) {
                best = cur;
                ans = w;
            }
        }
        return ans;
    }
};

class Solution:
    def mostCommonWord(self, paragraph: str, banned: List[str]) -> str:
        ban = set(w.lower() for w in banned)
        normalized = []
        for ch in paragraph:
            if ch.isalpha():
                normalized.append(ch.lower())
            else:
                normalized.append(' ')

        freq = {}
        ans = ""
        best = 0
        for w in ''.join(normalized).split():
            if w in ban:
                continue
            freq[w] = freq.get(w, 0) + 1
            if freq[w] > best:
                best = freq[w]
                ans = w
        return ans

var mostCommonWord = function(paragraph, banned) {
  const ban = new Set(banned.map(w => w.toLowerCase()));
  const normalized = paragraph
    .toLowerCase()
    .replace(/[^a-z]/g, ' ')
    .trim();

  const freq = new Map();
  let ans = '';
  let best = 0;

  for (const w of normalized.split(/\s+/)) {
    if (!w || ban.has(w)) continue;
    const cur = (freq.get(w) || 0) + 1;
    freq.set(w, cur);
    if (cur > best) {
      best = cur;
      ans = w;
    }
  }
  return ans;
};

中文

题目概述

给定一段文本 paragraph 和禁用词数组 banned，返回出现次数最多且不在禁用词中的单词。比较时不区分大小写，标点符号要忽略。

核心思路

先把文本标准化：字母转小写、非字母统一替换为空格。然后分词，跳过禁用词，用哈希表统计词频并维护最大值对应单词即可。

算法步骤

- 把 banned 转成小写哈希集合。
- 逐字符遍历段落：字母保留并转小写，其他字符替换为空格。
- 按空白分词。
- 对非禁用词计数，并在计数更新时维护当前最高频答案。

复杂度分析

设段落长度为 n。
时间复杂度：O(n)。
空间复杂度：O(n)。

常见陷阱

- 忽略大小写统一导致同词被拆分统计。
- 没去掉标点，导致词尾带符号。
- 先更新答案再过滤禁用词，造成错误结果。

多语言参考实现（Java / Go / C++ / Python / JavaScript）

class Solution {
    public String mostCommonWord(String paragraph, String[] banned) {
        Set<String> ban = new HashSet<>();
        for (String b : banned) ban.add(b.toLowerCase());

        StringBuilder sb = new StringBuilder();
        for (char c : paragraph.toCharArray()) {
            if (Character.isLetter(c)) sb.append(Character.toLowerCase(c));
            else sb.append(' ');
        }

        Map<String, Integer> freq = new HashMap<>();
        String ans = "";
        int best = 0;
        for (String w : sb.toString().split("\\s+")) {
            if (w.isEmpty() || ban.contains(w)) continue;
            int cur = freq.getOrDefault(w, 0) + 1;
            freq.put(w, cur);
            if (cur > best) {
                best = cur;
                ans = w;
            }
        }
        return ans;
    }
}

func mostCommonWord(paragraph string, banned []string) string {
    ban := map[string]bool{}
    for _, b := range banned {
        ban[strings.ToLower(b)] = true
    }

    var sb strings.Builder
    for _, ch := range paragraph {
        if unicode.IsLetter(ch) {
            sb.WriteRune(unicode.ToLower(ch))
        } else {
            sb.WriteRune(' ')
        }
    }

    freq := map[string]int{}
    ans := ""
    best := 0
    for _, w := range strings.Fields(sb.String()) {
        if ban[w] {
            continue
        }
        freq[w]++
        if freq[w] > best {
            best = freq[w]
            ans = w
        }
    }
    return ans
}

class Solution {
public:
    string mostCommonWord(string paragraph, vector<string>& banned) {
        unordered_set<string> ban;
        for (auto &b : banned) {
            for (char &c : b) c = tolower(c);
            ban.insert(b);
        }

        for (char &c : paragraph) {
            if (isalpha(c)) c = tolower(c);
            else c = ' ';
        }

        unordered_map<string, int> freq;
        stringstream ss(paragraph);
        string w, ans;
        int best = 0;
        while (ss >> w) {
            if (ban.count(w)) continue;
            int cur = ++freq[w];
            if (cur > best) {
                best = cur;
                ans = w;
            }
        }
        return ans;
    }
};

class Solution:
    def mostCommonWord(self, paragraph: str, banned: List[str]) -> str:
        ban = set(w.lower() for w in banned)
        normalized = []
        for ch in paragraph:
            if ch.isalpha():
                normalized.append(ch.lower())
            else:
                normalized.append(' ')

        freq = {}
        ans = ""
        best = 0
        for w in ''.join(normalized).split():
            if w in ban:
                continue
            freq[w] = freq.get(w, 0) + 1
            if freq[w] > best:
                best = freq[w]
                ans = w
        return ans

var mostCommonWord = function(paragraph, banned) {
  const ban = new Set(banned.map(w => w.toLowerCase()));
  const normalized = paragraph
    .toLowerCase()
    .replace(/[^a-z]/g, ' ')
    .trim();

  const freq = new Map();
  let ans = '';
  let best = 0;

  for (const w of normalized.split(/\s+/)) {
    if (!w || ban.has(w)) continue;
    const cur = (freq.get(w) || 0) + 1;
    freq.set(w, cur);
    if (cur > best) {
      best = cur;
      ans = w;
    }
  }
  return ans;
};