您的位置:首页 > 娱乐 > 明星 > 【滚动哈希 二分查找】1044. 最长重复子串

【滚动哈希 二分查找】1044. 最长重复子串

2024/10/11 8:20:04 来源:https://blog.csdn.net/he_zhidan/article/details/139479744  浏览:    关键词:【滚动哈希 二分查找】1044. 最长重复子串

本文涉及知识点

滚动哈希
二分查找算法合集

LeetCode 1044. 最长重复子串

给你一个字符串 s ,考虑其所有 重复子串 :即 s 的(连续)子串,在 s 中出现 2 次或更多次。这些出现之间可能存在重叠。
返回 任意一个 可能具有最长长度的重复子串。如果 s 不含重复子串,那么答案为 “” 。
示例 1:
输入:s = “banana”
输出:“ana”
示例 2:
输入:s = “abcd”
输出:“”
提示:
2 <= s.length <= 3 * 104
s 由小写英文字母组成

二分查找+滚动哈希

令 Check(len) 返回 是否存在长度为len的重复字符串
len1 < len2,如果Check(len2)为true,则Check(len1)一定为true
即 len ∈ \in [0,len3]为Check(len)为true,len ∈ \in [len3+1,n] Check(len)为false。
寻找最后一个true,故用左闭右开空间。

Check函数

len = 0 为0,返回true。
用滚动函数计算 s[i…i+len-1]的哈希值, i+ len <= s.length 并将哈希值记录到set中,如果存在重复值,返回true。

时间复杂度:O(nlogn)
二分查找:O(logn) Check函数O(n)

代码

核心代码

template<int MOD = 1000000007>
class C1097Int
{
public:C1097Int(long long llData = 0) :m_iData(llData% MOD){}C1097Int  operator+(const C1097Int& o)const{return C1097Int(((long long)m_iData + o.m_iData) % MOD);}C1097Int& operator+=(const C1097Int& o){m_iData = ((long long)m_iData + o.m_iData) % MOD;return *this;}C1097Int& operator-=(const C1097Int& o){m_iData = (m_iData + MOD - o.m_iData) % MOD;return *this;}C1097Int  operator-(const C1097Int& o){return C1097Int((m_iData + MOD - o.m_iData) % MOD);}C1097Int  operator*(const C1097Int& o)const{return((long long)m_iData * o.m_iData) % MOD;}C1097Int& operator*=(const C1097Int& o){m_iData = ((long long)m_iData * o.m_iData) % MOD;return *this;}C1097Int  operator/(const C1097Int& o)const{return *this * o.PowNegative1();}C1097Int& operator/=(const C1097Int& o){*this /= o.PowNegative1();return *this;}bool operator==(const C1097Int& o)const{return m_iData == o.m_iData;}bool operator<(const C1097Int& o)const{return m_iData < o.m_iData;}C1097Int pow(long long n)const{C1097Int iRet = 1, iCur = *this;while (n){if (n & 1){iRet *= iCur;}iCur *= iCur;n >>= 1;}return iRet;}C1097Int PowNegative1()const{return pow(MOD - 2);}int ToInt()const{return m_iData;}
private:int m_iData = 0;;
};//iCodeNum 必须大于等于可能的字符数
template<int MOD = 1000000007>
class CHashStr {
public:CHashStr(string s, int iCodeNum, int iCodeBegin = 1, char chBegin = 'a') {m_c = s.length();m_vP.resize(m_c + 1);m_vP[0] = 1;m_vHash.resize(m_c + 1);for (int i = 0; i < m_c; i++){const int P = iCodeBegin + iCodeNum;m_vHash[i + 1] = m_vHash[i] * P + s[i] - chBegin + iCodeBegin;m_vP[i + 1] = m_vP[i] * P;}}//iMinValue将被编码为0,iMaxValue被编码为iMaxValue-iMinValue。CHashStr(const int* data, int len, int iMinValue = 0, int iMaxValue = 9) {m_c = len;m_vP.resize(m_c + 1);m_vP[0] = 1;m_vHash.resize(m_c + 1);const int P = iMaxValue - iMinValue + 1;for (int i = 0; i < m_c; i++){const int iCurCode = data[i] - iMinValue;assert((iCurCode >= 0) && (iCurCode < P));m_vHash[i + 1] = m_vHash[i] * P + iCurCode;m_vP[i + 1] = m_vP[i] * P;}}//包括left rightint GetHash(int left, int right){return (m_vHash[right + 1] - m_vHash[left] * m_vP[right - left + 1]).ToInt();}inline int GetHash(int right){return m_vHash[right + 1].ToInt();}int GetHashExincludeRight(int left, int right){return (m_vHash[right] - m_vHash[left] * m_vP[right - left]).ToInt();}inline int GetHashExincludeRight(int right){return m_vHash[right].ToInt();}int m_c;vector<C1097Int<MOD>> m_vP;vector<C1097Int<MOD>> m_vHash;
};template<int MOD2 = 1000000009>
class C2HashStr
{
public:C2HashStr(string s) {m_pHash1 = std::make_unique<CHashStr<>>(s, 26);m_pHash2 = std::make_unique < CHashStr<MOD2>>(s, 27, 0);}C2HashStr(const int* data, int len, int iMinValue = 0, int iMaxValue = 9){m_pHash1 = std::make_unique<CHashStr<>>(data, len, iMinValue, iMaxValue);m_pHash2 = std::make_unique < CHashStr<MOD2>>(data, len, iMinValue, iMaxValue);}//包括left rightlong long GetHash(int left, int right){return (long long)m_pHash1->GetHash(left, right) * (MOD2 + 1) + m_pHash2->GetHash(left, right);}long long GetHash(int right){return (long long)m_pHash1->GetHash(right) * (MOD2 + 1) + m_pHash2->GetHash(right);}//包括Left,不包括Rightlong long GetHashExincludeRight(int left, int right){return (long long)m_pHash1->GetHashExincludeRight(left, right) * (MOD2 + 1) + m_pHash2->GetHashExincludeRight(left, right);}long long GetHashExincludeRight(int right){return (long long)m_pHash1->GetHashExincludeRight(right) * (MOD2 + 1) + m_pHash2->GetHashExincludeRight(right);}
private:std::unique_ptr<CHashStr<>> m_pHash1;std::unique_ptr<CHashStr<MOD2>> m_pHash2;
};namespace NBinarySearch
{template<class INDEX_TYPE, class _Pr>INDEX_TYPE FindFrist(INDEX_TYPE left, INDEX_TYPE rightInclue, _Pr pr){while (rightInclue - left > 1){const auto mid = left + (rightInclue - left) / 2;if (pr(mid)){rightInclue = mid;}else{left = mid;}}return rightInclue;}template<class INDEX_TYPE, class _Pr>INDEX_TYPE FindEnd(INDEX_TYPE leftInclude, INDEX_TYPE right, _Pr pr){while (right - leftInclude > 1){const auto mid = leftInclude + (right - leftInclude) / 2;if (pr(mid)){leftInclude = mid;}else{right = mid;}}return leftInclude;}
}class Solution {
public:string longestDupSubstring(string s) {string ret;C2HashStr<> dh(s);auto Check = [&](int len) {if (0 == len) { ret = ""; return true; }unordered_set<long long> setHas;for (int i = 0; i + len <= s.length(); i++) {auto cur = dh.GetHashExincludeRight(i, i + len);if (setHas.count(cur)) {ret = s.substr(i, len);return true;}setHas.emplace(cur);}return false;};NBinarySearch::FindEnd(0, (int)s.length() + 1, Check);return ret;}
};

单元测试

template<class T1,class T2>
void AssertEx(const T1& t1, const T2& t2)
{Assert::AreEqual(t1 , t2);
}template<class T>
void AssertEx(const vector<T>& v1, const vector<T>& v2)
{Assert::AreEqual(v1.size(), v2.size());	for (int i = 0; i < v1.size(); i++){Assert::AreEqual(v1[i], v2[i]);}
}template<class T>
void AssertV2(vector<vector<T>> vv1, vector<vector<T>> vv2)
{sort(vv1.begin(), vv1.end());sort(vv2.begin(), vv2.end());Assert::AreEqual(vv1.size(), vv2.size());for (int i = 0; i < vv1.size(); i++){AssertEx(vv1[i], vv2[i]);}
}namespace UnitTest
{string s;TEST_CLASS(UnitTest){public:TEST_METHOD(TestMethod1){s = "banana";auto res = Solution().longestDupSubstring(s);AssertEx(string("ana"), res);}TEST_METHOD(TestMethod2){s = "abcd";auto res = Solution().longestDupSubstring(s);AssertEx(string(""), res);}TEST_METHOD(TestMethod3){s = "aa";auto res = Solution().longestDupSubstring(s);AssertEx(string("a"), res);}	};
}

扩展阅读

视频课程

先学简单的课程,请移步CSDN学院,听白银讲师(也就是鄙人)的讲解。
https://edu.csdn.net/course/detail/38771

如何你想快速形成战斗了,为老板分忧,请学习C#入职培训、C++入职培训等课程
https://edu.csdn.net/lecturer/6176

相关推荐

我想对大家说的话
《喜缺全书算法册》以原理、正确性证明、总结为主。
按类别查阅鄙人的算法文章,请点击《算法与数据汇总》。
有效学习:明确的目标 及时的反馈 拉伸区(难度合适) 专注
闻缺陷则喜(喜缺)是一个美好的愿望,早发现问题,早修改问题,给老板节约钱。
子墨子言之:事无终始,无务多业。也就是我们常说的专业的人做专业的事。
如果程序是一条龙,那算法就是他的是睛

测试环境

操作系统:win7 开发环境: VS2019 C++17
或者 操作系统:win10 开发环境: VS2022 C++17
如无特殊说明,本算法用**C++**实现。

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com