AC自动机加强版 uva 1449 - Dominating Patterns

本文主要是介绍AC自动机加强版 uva 1449 - Dominating Patterns，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

AC自动机最初作用一个常见的例子就是给出n个单词，再给出一段包含m个字符的文章，让你找出有多少个单词在文章里出现过。

当然这不是AC自动机的全部作用。

本文就是一例，给出几个单词，查询在text里出现最多次数的单词，如果不唯一，按输入次序输出

AC自动机是刚刚学的，修改其实自己没能力，参考了别人的代码，修改了自己的模板

先看题目http://uva.onlinejudge.org/index.php?option=com_onlinejudge&Itemid=8&category=505&page=show_problem&problem=4195

再看我自己的AC模板（修改前的）

/*************************************************/
//AC 自动机  by Pilgrim
//
//MAXLEN 模式串的长度
//str 模式串（待匹配的）
//keyword 待输入的单词
//cnt是否为该单词的最后一个节点,Insert的时候,
//当单词插入完成，其最后一个节点的cnt=1
//root的fail为NULL
//
// 初始化
//root=cur=Trie;    //cur指向存储单元的末地址
//head = tail = 0;
//root->clr();
//另外在Insert的时候  创建节点的时候也是要clr()的
/*************************************************/#define MAXLEN 1000010
#define MAXTRIE 500010
#define WORDLEN 51
#define KIND 26char str[MAXLEN],keyword[WORDLEN];struct Node{Node *fail;Node *next[KIND];   /*next数组里存的是当前节点的孩子*/int cnt;void clr(){fail = NULL;cnt = 0;memset(next,0,sizeof(next)*KIND);}
}Trie[MAXTRIE],*q[MAXTRIE],*root,*cur;  /*看最开头的注释*/
int head,tail;/*队列首尾 初始化head = tail = 0*/void Insert(char s[])   /*向Tries 插入单词*/
{int idx,i,n=strlen(s);Node *p=root;for(int i=0;i<n;i++){idx = s[i]-'a';if(p->next[idx]==NULL){/*NULL的时候才分配空间，是为了处理这种情况：he  her 两个单词，字典树上他们在同一个分支，r和e的cnt都是1*/p->next[idx]=++cur;p->next[idx]->clr();}p=p->next[idx];}p->cnt++;   /*插入完成，此处是p->cnt++而非p->cnt=1;是为了排除这种情况：her er 这两个单词，如果是p->cnt=1只会被按一种单词计数，但是也造成的问题是，模板串即keyword重复的时候会出现问题*/
}void Build_AC()
{Node *p,*tmp;root->fail=NULL;q[tail++]=root;while(head!=tail){p=q[head++];for(int i=0;i<KIND;i++){if(p->next[i]){q[tail++]=p->next[i];if(p == root){p->next[i]->fail = root;}else{tmp=p->fail;while(tmp!=NULL){if(tmp->next[i])    /*tmp->next[i] p->next[i]  i都表示'a'+i故如果tmp->next[i]！=NULL,说明以前出现过'a'+i*/{p->next[i]->fail=tmp->next[i];break;}tmp=tmp->fail;}if(tmp == NULL)p->next[i]->fail = root;}}}}
}int Query()
{int ans=0,n=strlen(str),idx;Node *tmp,*p=root;for(int i=0;i<n;i++){idx=str[i]-'a';while(p->next[idx]==NULL && p!=root) //跳转失败指针，比如已经到达最底层p=p->fail;p=p->next[idx];if(p==NULL) /*如果Trie中不存在该字母，就让该字母的指针指向root*/p=root;tmp = p;	//p不动,tmp计算后缀串while(tmp!=root && tmp->cnt!=-1)//模拟下就可以知道，tmp->cnt!=-1{                               //是为了防止对同一个串重复计数ans+=tmp->cnt;              //比如单词是her  str是herher 此时答案只是1tmp->cnt=-1;tmp=tmp->fail;//指针移向下个字符继续匹配}}return ans;
}void Init_AC()
{cur = root = Trie;root->clr();head = tail = 0;
}

首先分析题目需要处理的几个问题：

1、计数。

AC自动机为了不重复计数，有这么一句tmp->cnt=-1;

这一句肯定要改掉

另外，注意即使不是单词结尾，也可以进入这个循环，而每次进入这个循环，如果/*(2)*/这里处理不当，肯定会多计数

    while(tmp!=root && tmp->cnt!=-1)/*(1)*/{                               ans+=tmp->cnt;              tmp->cnt=-1;/*(2)*/tmp=tmp->fail;//指针移向下个字符继续匹配}

需要做两处修改，/*(1)*/处，tmp->cnt>=1才进入循环； /*(2)*/处，数组记录出现次数

2、当有些word出现次数相同的时候，怎么输出所有的。

答案是，记录最大的出现次数mmax，然后扫记录所有单词出现次数的数组，只要跟mmax相同，就输出。

最后上代码

#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <cstring>
#include <map>using namespace std;#define MAXLEN 1000010
#define MAXTRIE 500010
#define WORDLEN 71
#define KIND 26
#define N 201char str[MAXLEN],keyword[WORDLEN];
char all[N][WORDLEN];
int num[N];/*存储字符串个数*/struct Node{Node *fail;Node *next[KIND];   /*next数组里存的是当前节点的孩子*/int cnt,id;void clr(){fail = NULL;cnt = 0;id=-1;memset(next,0,sizeof(next)*KIND);}
}Trie[MAXTRIE],*q[MAXTRIE],*root,*cur;  /*看最开头的注释*/
int head,tail;/*队列首尾 初始化head = tail = 0*/void Insert(char s[],int id)   /*向Tries 插入单词*/
{int idx,i,n=strlen(s);Node *p=root;for(int i=0;i<n;i++){idx = s[i]-'a';if(p->next[idx]==NULL){/*NULL的时候才分配空间，是为了处理这种情况：he  her 两个单词，字典树上他们在同一个分支，r和e的cnt都是1*/p->next[idx]=++cur;p->next[idx]->clr();}p=p->next[idx];}p->cnt++;   /*插入完成，此处是p->cnt++而非p->cnt=1;是为了排除这种情况：her er 这两个单词，如果是p->cnt=1只会被按一种单词计数，但是也造成的问题是，模板串即keyword重复的时候会出现问题*/p->id=id;
}void Build_AC()
{Node *p,*tmp;root->fail=NULL;q[tail++]=root;while(head!=tail){p=q[head++];for(int i=0;i<KIND;i++){if(p->next[i]){q[tail++]=p->next[i];if(p == root){p->next[i]->fail = root;}else{tmp=p->fail;while(tmp!=NULL){if(tmp->next[i])    /*tmp->next[i] p->next[i]  i都表示'a'+i故如果tmp->next[i]！=NULL,说明以前出现过'a'+i*/{p->next[i]->fail=tmp->next[i];break;}tmp=tmp->fail;}if(tmp == NULL)p->next[i]->fail = root;}}}}
}int Query()
{int ans=0,n=strlen(str),idx;Node *tmp,*p=root;for(int i=0;i<n;i++){idx=str[i]-'a';while(p->next[idx]==NULL && p!=root) //跳转失败指针，比如已经到达最底层p=p->fail;p=p->next[idx];if(p==NULL) /*如果Trie中不存在该字母，就让该字母的指针指向root*/p=root;tmp = p;	//p不动,tmp计算后缀串while(tmp!=root && tmp->cnt>=1)//模拟下就可以知道，tmp->cnt!=-1{                               //是为了防止对同一个串重复计数ans+=tmp->cnt;if(tmp->id!=-1)             //比如单词是her  str是herher 此时答案只是1num[tmp->id]++;//tmp->cnt=-1;tmp=tmp->fail;//指针移向下个字符继续匹配}}return ans;
}void Init_AC()
{cur = root = Trie;root->clr();head = tail = 0;memset(num,0,sizeof(num));
}int main()
{int n;while(scanf("%d",&n)!=EOF && n){Init_AC();for(int i=0;i<n;i++){scanf("%s",keyword);strcpy(all[i],keyword);Insert(keyword,i);}scanf("%s",str);Build_AC();int mmax = -1;Query();for(int i=0;i<n;i++)mmax = max(mmax,num[i]);printf("%d\n",mmax);for(int i=0;i<n;i++)if(num[i]==mmax)puts(all[i]);}return 0;
}

上面的略慢，再快一点的，Query循环里稍改下

#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <cstring>
#include <map>using namespace std;#define MAXLEN 1000010
#define MAXTRIE 500010
#define WORDLEN 71
#define KIND 26
#define N 201char str[MAXLEN],keyword[WORDLEN];
char all[N][WORDLEN];
int num[N];/*存储字符串个数*/struct Node{Node *fail;Node *next[KIND];   /*next数组里存的是当前节点的孩子*/int cnt,id;void clr(){fail = NULL;cnt = 0;id=-1;memset(next,0,sizeof(next)*KIND);}
}Trie[MAXTRIE],*q[MAXTRIE],*root,*cur;  /*看最开头的注释*/
int head,tail;/*队列首尾 初始化head = tail = 0*/void Insert(char s[],int id)   /*向Tries 插入单词*/
{int idx,i,n=strlen(s);Node *p=root;for(int i=0;i<n;i++){idx = s[i]-'a';if(p->next[idx]==NULL){/*NULL的时候才分配空间，是为了处理这种情况：he  her 两个单词，字典树上他们在同一个分支，r和e的cnt都是1*/p->next[idx]=++cur;p->next[idx]->clr();}p=p->next[idx];}p->cnt++;   /*插入完成，此处是p->cnt++而非p->cnt=1;是为了排除这种情况：her er 这两个单词，如果是p->cnt=1只会被按一种单词计数，但是也造成的问题是，模板串即keyword重复的时候会出现问题*/p->id=id;
}void Build_AC()
{Node *p,*tmp;root->fail=NULL;q[tail++]=root;while(head!=tail){p=q[head++];for(int i=0;i<KIND;i++){if(p->next[i]){q[tail++]=p->next[i];if(p == root){p->next[i]->fail = root;}else{tmp=p->fail;while(tmp!=NULL){if(tmp->next[i])    /*tmp->next[i] p->next[i]  i都表示'a'+i故如果tmp->next[i]！=NULL,说明以前出现过'a'+i*/{p->next[i]->fail=tmp->next[i];break;}tmp=tmp->fail;}if(tmp == NULL)p->next[i]->fail = root;}}}}
}int Query()
{int ans=0,n=strlen(str),idx;Node *tmp,*p=root;for(int i=0;i<n;i++){idx=str[i]-'a';while(p->next[idx]==NULL && p!=root) //跳转失败指针，比如已经到达最底层p=p->fail;p=p->next[idx];if(p==NULL) /*如果Trie中不存在该字母，就让该字母的指针指向root*/p=root;tmp = p;	//p不动,tmp计算后缀串while(tmp!=root)//模拟下就可以知道，tmp->cnt!=-1{if(tmp->cnt>=1)                   //是为了防止对同一个串重复计数if(tmp->id!=-1)             //比如单词是her  str是herher 此时答案只是1num[tmp->id]++;tmp=tmp->fail;//指针移向下个字符继续匹配}}return ans;
}void Init_AC()
{cur = root = Trie;root->clr();head = tail = 0;memset(num,0,sizeof(num));
}int main()
{int n;while(scanf("%d",&n)!=EOF && n){Init_AC();for(int i=0;i<n;i++){scanf("%s",keyword);strcpy(all[i],keyword);Insert(keyword,i);}scanf("%s",str);Build_AC();int mmax = -1;Query();for(int i=0;i<n;i++)mmax = max(mmax,num[i]);printf("%d\n",mmax);for(int i=0;i<n;i++)if(num[i]==mmax)puts(all[i]);}return 0;
}

这篇关于AC自动机加强版 uva 1449 - Dominating Patterns的文章就介绍到这儿，希望我们推荐的文章对编程师们有所帮助！