洛谷B3927题（2023年GESP四级）：哈希映射实现小杨的字典

2个月前 (08-11)比赛题解163

一、题目解读

洛谷B3927是一道考察字符串处理与哈希映射的经典算法题。题目要求实现一个简易的字典翻译系统：给定N组A/B语言单词映射，需要将A语言文章中的单词逐一翻译为B语言（未收录单词标记为"UNK"），同时保留原文标点符号不变。题目难点在于正确处理单词边界与标点符号的混合输入。

二、解题思路

核心算法采用哈希映射+逐字符扫描策略：

预处理阶段：使用STL的unordered_map建立A→B的高效字典查询结构（时间复杂度O(1)）
扫描阶段：设计字符分类器区分字母与标点符号（ASCII码范围判断）
缓冲机制：通过currentWord字符串动态缓存当前单词，遇到标点符号时触发翻译
边界处理：单独处理文章末尾可能遗留的未翻译单词

三、解题步骤

读取字典条目数N，构建哈希字典
逐行读入A/B语言单词对存入unordered_map
读入待翻译文章，初始化结果字符串和单词缓存
遍历文章每个字符：

字母字符：追加到currentWord
标点符号：先处理缓存的单词（查询字典→追加结果），再追加标点

输出最终翻译结果

四、完整代码实现（含注释）

#include <iostream>
#include <string>
#include <unordered_map>
using namespace std;

// 判断字符是否为标点符号
bool isPunctuation(char c) {
    return c < 'a' || c > 'z';
}

int main() {
    long long N;
    cin >> N;
    
    // 创建字典，存储A语言到B语言的映射
    unordered_map<string, string> dictionary;
    
    // 读取字典条目
    for (int i = 0; i < N; i++) {
        string a, b;
        cin >> a >> b;
        dictionary[a] = b;
    }
    
    string article,result;
    cin>>article;
    
    string currentWord; // 当前正在处理的单词
    
    for (char c : article) {
        if (isPunctuation(c)) {
            // 遇到标点符号，处理当前单词
            if (!currentWord.empty()) {
                // 查找字典
                if (dictionary.count(currentWord)) {
                    result += dictionary[currentWord];
                } else {
                    result += "UNK";
                }
                currentWord.clear();
            }
            result += c; // 添加标点符号
        } else {
            // 非标点符号，添加到当前单词
            currentWord += c;
        }
    }
    
    // 处理文章末尾可能剩余的单词
    if (!currentWord.empty()) {
        if (dictionary.count(currentWord)) {
            result += dictionary[currentWord];
        } else {
            result += "UNK";
        }
    }
    
    cout << result << endl;
    return 0;
}