本文主要是介绍leetcode 刷题视频(7) - 哈希表与字符串,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
哈希表与字符串
预备知识
拉链法解决冲突,构造哈希表。
#include <iostream>
using namespace std;
struct ListNode {int val;ListNode *next;ListNode(int x) : val(x), next(nullptr) {}
};int hash_func(int key, int table_len) { return key % table_len; }
void insert(ListNode *hash_table[], ListNode *node, int table_len) {int hash_key = hash_func(node->val, table_len);node->next = hash_table[hash_key]->next;hash_table[hash_key] = node;
}
bool search(ListNode *hash_table[], int value, int table_len) {int hash_key = hash_func(value, table_len);ListNode *head = hash_table[hash_key];while (head) {if (head->val == value) {return true;}head = head->next;}return false;
}
TABLE_LEN取为质数,冲突比其它数字少。
STL的map
#include <iostream>
#include <map>
using namespace std;int main() {map<string, int> hash_map;string str1 = "abc";string str2 = "aaa";string str3 = "xxxxxx";hash_map[str1] = 1;hash_map[str2] = 2;hash_map[str3] = 100;if (hash_map.find(str1) != hash_map.end()) {printf("map[%s]=%d\n", str1.c_str(), hash_map[str1]);}for (auto &it : hash_map) {printf("hasp_map[%s]=%d\n", it.first.c_str(), it.second);}return 0;
}
问题1 最长回文串
Given a string s
which consists of lowercase or uppercase letters, return the length of the longest palindrome that can be built with those letters.
Letters are case sensitive, for example, "Aa"
is not considered a palindrome here.
Example 1:
Input: s = "abccccdd"
Output: 7
Explanation:
One longest palindrome that can be built is "dccaccd", whose length is 7.
Example 2:
Input: s = "a"
Output: 1
Example 3:
Input: s = "bb"
Output: 2
Constraints:
1 <= s.length <= 2000
s
consits of lower-case and/or upper-case English letters only.
链接:https://leetcode-cn.com/problems/longest-palindrome/
使用字符串s中的字符,任意组合(可以交换顺序),生成新字符串。若生成的字符串为回文字符串,需要除了中心字符,其余字符只要头部出现,尾部就要对应出现。
思路:
-
利用字符哈希方法,统计字符串中所有字符数量;
-
设置最长回文串偶数字符长度为 max_length = 0;
-
设置是否有中心点标记 flag = 0;
-
遍历每一个字符,字符数为 count,若 count 为偶数,则max_length+=count;若count为奇数,则max_length+=count-1,flag=1;
-
最终回文串长度为max_length+flag。
问题2 词语模式
Given a pattern
and a string s
, find if s
follows the same pattern.
Here follow means a full match, such that there is a bijection between a letter in pattern
and a non-empty word in s
.
Example 1:
Input: pattern = "abba", s = "dog cat cat dog"
Output: true
Example 2:
Input: pattern = "abba", s = "dog cat cat fish"
Output: false
Example 3:
Input: pattern = "aaaa", s = "dog cat cat dog"
Output: false
Example 4:
Input: pattern = "abba", s = "dog dog dog dog"
Output: false
Constraints:
1 <= pattern.length <= 300
pattern
contains only lower-case English letters.1 <= s.length <= 3000
s
contains only lower-case English letters and spaces' '
.s
does not contain any leading or trailing spaces.- All the words in
s
are separated by a single space.
链接:https://leetcode-cn.com/problems/word-pattern/
这个遇到的最大问题是:函数的输入一直搞混
pattern
和s
。(在字符串问题里遇到的都是这种错误)
另外pattern
和单词的数量必须相等;
只建立单向映射是不行的,比如只建立pattern
到单词的映射,如果新pattern
出现,那么会不知道单词是不是也是新出现;如果只建立单词到pattern
的映射,如果新单词出现,也不知道pattern
是不是新的。检测字符出现比检测单词出现代价小一些,所以后者好一些。
问题3 相同字符的词语分组
Given an array of strings strs
, group the anagrams together. You can return the answer in any order.
An Anagram is a word or phrase formed by rearranging the letters of a different word or phrase, typically using all the original letters exactly once.
Example 1:
Input: strs = ["eat","tea","tan","ate","nat","bat"]
Output: [["bat"],["nat","tan"],["ate","eat","tea"]]
Example 2:
Input: strs = [""]
Output: [[""]]
Example 3:
Input: strs = ["a"]
Output: [["a"]]
Constraints:
1 <= strs.length <= 10^4
0 <= strs[i].length <= 100
strs[i]
consists of lower-case English letters.
链接:https://leetcode-cn.com/problems/group-anagrams/
方法1 字符排序
思路:设置字符串到字符串向量的哈希表anagram,遍历输入的字符串列表strs中的单词strs[i]:
- 设置临时变量str=strs[i],对str进行排序
- 若str未出现在anagram中,设置str到一个空字符串向量的映射
- 将strs[i]添加至字符串向量anagram[str]中
class Solution {
public:vector<vector<string> > groupAnagrams(vector<string> &strs) {map<string, vector<string> > anagram;vector<vector<string> > result;for (auto &i : strs) {string str = i;sort(str.begin(), str.end());if (anagram.find(str) == anagram.end()) {vector<string> item;anagram[str] = item;}anagram[str].push_back(i);}result.reserve(anagram.size());for (auto &it : anagram) {result.push_back(it.second);}return result;}
};
方法2 统计字符数量
以字符到个数的vector为key,以字符串向量为value。
步骤:
- 统计各个字符数量,存储至vec;
- 若vec未出现在anagram中,则设置vec到一个空向量的映射;
- 将strs[i]添加到字符串向量anagram[vec]中;
问题4 无重复字符的最长子串
Given a string s
, find the length of the longest substring without repeating characters.
Example 1:
Input: s = "abcabcbb"
Output: 3
Explanation: The answer is "abc", with the length of 3.
Example 2:
Input: s = "bbbbb"
Output: 1
Explanation: The answer is "b", with the length of 1.
Example 3:
Input: s = "pwwkew"
Output: 3
Explanation: The answer is "wke", with the length of 3.
Notice that the answer must be a substring, "pwke" is a subsequence and not a substring.
Example 4:
Input: s = ""
Output: 0
Constraints:
0 <= s.length <= 5 * 104
s
consists of English letters, digits, symbols and spaces.
链接:https://leetcode-cn.com/problems/longest-substring-without-repeating-characters/
思路:
- 设置一个记录字符数量的字符哈希 char_map
- 设置一个记录当前满足条件的最长子串 word
- 设置两个指针 i 与 begin 指向字符串的第一个字符
- 设置最长满足条件的子串的长度 result
- i 指针向后逐个扫描字符串中的字符,在这个过程中,使用char_map记录字符数量;
- 如果word中没出现该字符,对word尾部添加字符并检查result是否需要更新
- 否则begin指针向前移动,更新char_map中的字符数量,直到s[i]的数量为1
class Solution {
public:int lengthOfLongestSubstring(string s) {int begin = 0;int result = 0;string word;int char_map[128] = {0};for (int i = 0; i < s.length(); i++) {char_map[s[i]]++;if (char_map[s[i]] == 1) {word += s[i];if (result < word.length()) {result = word.length();}} else {cout << char_map[s[begin]] << endl;while (/*begin < i &&*/char_map[s[i]] > 1) {char_map[s[begin]]--;begin++;}word = s.substr(begin, i - begin + 1); // substr(begin, length)}}return result;}
};
上面的代码效率太低了,更新一下稍微快点的
int lengthOfLongestSubstring(string s) {if (s.empty()) return 0;int winStart = 0; // 当前窗口的起点int maxLeft = 0, maxRight = 0; // 结果的左右边界vector<int> lastPosition(128, -1);lastPosition[s[0]] = 0; // 第一个字母特殊处理,标记本单词的位置for (int i = 1; i < s.size(); i++) { // 从第二个字母开始if (lastPosition[s[i]] >= 0) { // 如果之前存在这个字符// 则依次标记为-1int end = lastPosition[s[i]];for (int j = winStart; j <= end; j++) {lastPosition[s[j]] = -1;}winStart = end + 1; // 更新窗口起点}lastPosition[s[i]] = i;if (i - winStart > maxRight - maxLeft) {maxLeft = winStart;maxRight = i;}}return maxRight - maxLeft + 1;}
问题5 找出重复序列
All DNA is composed of a series of nucleotides abbreviated as 'A'
, 'C'
, 'G'
, and 'T'
, for example: "ACGAATTCCG"
. When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
Example 1:
Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
Output: ["AAAAACCCCC","CCCCCAAAAA"]
Example 2:
Input: s = "AAAAAAAAAAAAA"
Output: ["AAAAAAAAAA"]
Constraints:
0 <= s.length <= 105
s[i]
is'A'
,'C'
,'G'
, or'T'
.
找出长度为10且超过1次的字符串。
链接:https://leetcode-cn.com/problems/repeated-dna-sequences
方法1 简单思路
将所有长度为10的子串加入hash map中,并记录子串数量。
方法2 设计一个序列到int的映射
AGCT只有4种可能可以用2bit表示,10个字符可以用20bit表示,int还是能盛下的。
之后的思路和方法1一样。
本来映射应该使用map结构的,但是这道题使用数组结构竟然比map还省内存,震惊!
class Solution { public:vector<string> findRepeatedDnaSequences(string s) {if (s.size() <= 10) return {};vector<string> result;unsigned chMap[128];chMap['A'] = 0;chMap['C'] = 1;chMap['G'] = 2;chMap['T'] = 3;char state[2 << 20 + 1] = {0}; // 0表示未出现 1表示第一次出现,2表示多次出现 [如果用bitset存储会更省内存]// 处理第一个单词unsigned key = 0;for (int j = 0; j < 10; j++) {int index = 9 - j;key += chMap[s[index]] << (j * 2); // 之前少写了加号}state[key] = true;for (int i = 10; i < s.size(); i++) {key <<= 2; // 左移两位key &= 0xfffff; // 去掉高位key += chMap[s[i]];if (state[key] == 0) {state[key] = 1;} else {if (state[key] == 1) { // 仅处理第一次出现result.push_back(s.substr(i - 9, 10));state[key] = 2;}}}return result;} };
问题6 最小窗口子串
Given a string S and a string T, find the minimum window in S which will contain all the characters in T in complexity O(n).
Example:
Input: S = "ADOBECODEBANC", T = "ABC"
Output: "BANC"
Note:
- If there is no such window in S that covers all characters in T, return the empty string
""
. - If there is such window, you are guaranteed that there will always be only one unique minimum window in S.
链接:https://leetcode-cn.com/problems/minimum-window-substring/
class Solution {
public:string minWindow(string s, string t) {const int MAX_ARRAY_LEN = 128;int map_t[MAX_ARRAY_LEN] = {0};int map_s[MAX_ARRAY_LEN] = {0};vector<int> vec_t;// 统计T中有哪些字符for (char c : t) {map_t[c]++;}// 将T中出现的字符存储到vec_t中for (int i = 0; i < MAX_ARRAY_LEN; i++) {if (map_t[i] > 0)vec_t.push_back(i);}int windows_begin = 0;string result;for (int i = 0; i < s.length(); ++i) {map_s[s[i]]++;while (windows_begin < i) {char begin_ch = s[windows_begin];if (map_t[begin_ch] == 0) {windows_begin++;} else if (map_s[begin_ch] > map_t[begin_ch]) {map_s[begin_ch]--;windows_begin++;} else { // ms <= mt && mt != 0break;}}if (is_window_ok(map_s, map_t, vec_t)) {int new_window_len = i - windows_begin + 1;if (result.empty() || new_window_len < result.length()) {result = s.substr(windows_begin, new_window_len);}}}return result;}private:// 检查 s 是否包括 tbool is_window_ok(int map_s[], int map_t[], vector<int> &vec_t) {for (int i = 0; i < vec_t.size(); i++) {if (map_s[vec_t[i]] < map_t[vec_t[i]]) {return false;}}return true;}
};
leetcode 官方解法:
这篇关于leetcode 刷题视频(7) - 哈希表与字符串的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!