LeetCode 929: Unique Email Addresses (Normalize Local Name Rules + Hash Set)
LeetCode 929StringHash SetToday we solve LeetCode 929 - Unique Email Addresses.
Source: https://leetcode.com/problems/unique-email-addresses/
English
Problem Summary
Given a list of email addresses, apply Gmail-like rules on the local name: ignore dots ., and ignore everything after the first plus +. Domain part stays unchanged. Return how many unique normalized addresses remain.
Key Insight
Each original email can be transformed into a canonical string. Once canonicalized, the problem becomes straightforward deduplication with a hash set.
Algorithm
- For each email, split at @ into local and domain.
- In local, stop at first + if it exists.
- Remove all dots from the kept local part.
- Rebuild canonical address as normalizedLocal + "@" + domain and insert into a set.
- Final answer is set size.
Complexity Analysis
Time: O(total_chars).
Space: O(total_chars) for stored canonical addresses.
Common Pitfalls
- Applying plus/dot rules to the domain part (wrong).
- Removing text after every plus instead of only first plus (equivalent if done by first split).
- Forgetting that dots are ignored only in local part.
Reference Implementations (Java / Go / C++ / Python / JavaScript)
class Solution {
public int numUniqueEmails(String[] emails) {
Set<String> seen = new HashSet<>();
for (String email : emails) {
int at = email.indexOf('@');
String local = email.substring(0, at);
String domain = email.substring(at + 1);
int plus = local.indexOf('+');
if (plus != -1) {
local = local.substring(0, plus);
}
local = local.replace(".", "");
seen.add(local + "@" + domain);
}
return seen.size();
}
}func numUniqueEmails(emails []string) int {
seen := map[string]struct{}{}
for _, email := range emails {
at := strings.IndexByte(email, '@')
local := email[:at]
domain := email[at+1:]
if plus := strings.IndexByte(local, '+'); plus != -1 {
local = local[:plus]
}
local = strings.ReplaceAll(local, ".", "")
seen[local+"@"+domain] = struct{}{}
}
return len(seen)
}class Solution {
public:
int numUniqueEmails(vector<string>& emails) {
unordered_set<string> seen;
for (const string& email : emails) {
int at = email.find('@');
string local = email.substr(0, at);
string domain = email.substr(at + 1);
int plus = local.find('+');
if (plus != string::npos) {
local = local.substr(0, plus);
}
string normalized;
for (char c : local) {
if (c != '.') normalized.push_back(c);
}
seen.insert(normalized + "@" + domain);
}
return (int)seen.size();
}
};class Solution:
def numUniqueEmails(self, emails: List[str]) -> int:
seen = set()
for email in emails:
local, domain = email.split('@')
local = local.split('+', 1)[0].replace('.', '')
seen.add(f"{local}@{domain}")
return len(seen)var numUniqueEmails = function(emails) {
const seen = new Set();
for (const email of emails) {
const at = email.indexOf('@');
let local = email.slice(0, at);
const domain = email.slice(at + 1);
const plus = local.indexOf('+');
if (plus !== -1) {
local = local.slice(0, plus);
}
local = local.replace(/\./g, '');
seen.add(local + '@' + domain);
}
return seen.size;
};中文
题目概述
给定一组邮箱地址,按规则规范化本地名:忽略点号 .,并忽略第一个加号 + 之后的内容。域名部分保持不变。返回规范化后不同邮箱的数量。
核心思路
把每个邮箱先转换为“标准形态”字符串,再放进哈希集合去重即可。问题本质是字符串标准化 + 去重。
算法步骤
- 对每个邮箱按 @ 分割成 local 和 domain。
- local 遇到第一个 + 后截断。
- 删除截断后 local 中的所有点号 .。
- 拼成 normalizedLocal + "@" + domain 放入集合。
- 最终答案是集合大小。
复杂度分析
时间复杂度:O(总字符数)。
空间复杂度:O(总字符数)(用于存储规范化结果)。
常见陷阱
- 错把加号/点号规则应用到 domain(错误)。
- 忘记“只处理 local 部分”。
- 字符串截断顺序写错导致结果不一致。
多语言参考实现(Java / Go / C++ / Python / JavaScript)
class Solution {
public int numUniqueEmails(String[] emails) {
Set<String> seen = new HashSet<>();
for (String email : emails) {
int at = email.indexOf('@');
String local = email.substring(0, at);
String domain = email.substring(at + 1);
int plus = local.indexOf('+');
if (plus != -1) {
local = local.substring(0, plus);
}
local = local.replace(".", "");
seen.add(local + "@" + domain);
}
return seen.size();
}
}func numUniqueEmails(emails []string) int {
seen := map[string]struct{}{}
for _, email := range emails {
at := strings.IndexByte(email, '@')
local := email[:at]
domain := email[at+1:]
if plus := strings.IndexByte(local, '+'); plus != -1 {
local = local[:plus]
}
local = strings.ReplaceAll(local, ".", "")
seen[local+"@"+domain] = struct{}{}
}
return len(seen)
}class Solution {
public:
int numUniqueEmails(vector<string>& emails) {
unordered_set<string> seen;
for (const string& email : emails) {
int at = email.find('@');
string local = email.substr(0, at);
string domain = email.substr(at + 1);
int plus = local.find('+');
if (plus != string::npos) {
local = local.substr(0, plus);
}
string normalized;
for (char c : local) {
if (c != '.') normalized.push_back(c);
}
seen.insert(normalized + "@" + domain);
}
return (int)seen.size();
}
};class Solution:
def numUniqueEmails(self, emails: List[str]) -> int:
seen = set()
for email in emails:
local, domain = email.split('@')
local = local.split('+', 1)[0].replace('.', '')
seen.add(f"{local}@{domain}")
return len(seen)var numUniqueEmails = function(emails) {
const seen = new Set();
for (const email of emails) {
const at = email.indexOf('@');
let local = email.slice(0, at);
const domain = email.slice(at + 1);
const plus = local.indexOf('+');
if (plus !== -1) {
local = local.slice(0, plus);
}
local = local.replace(/\./g, '');
seen.add(local + '@' + domain);
}
return seen.size;
};
Comments