This is a C++ program to perform finite state automaton based search. An automaton with a finite number of states is called a Finite Automaton. Here, a text is given text[0 … t-1] and a pattern p[0 ... p-1] is also given. We have to find the pattern in the text and print its all occurrences at the respective indices.
Algorithms
Begin
Function void transitiontable():
1) put the entries in first row and filled it up. All entries in first row are always 0 except the entry for p[0] character. Always we need to go to state 1.
for p[0].
2) Initialize longestprefixsuffix as 0.
3) for i = 1 to P. (Here P is the length of the pattern)
a) Copy the entries from the row at index equal to longestprefixsuffix.
b) Update the entry for p[i] character to i+1.
c) Update longestprefixsuffix = TF[lps][pat[i]]
where TT is the 2D array.
End
Example
#include<iostream>
#include<cstring>
#define NO_OF_CHARS 512
using namespace std;
// builds the TF table which represents Finite Automata for a given pattern
void transitiontable(char *p, int P, int TT[][NO_OF_CHARS]) {
int i, longestprefixsuffix = 0, y;
// put entries in first row
for (y =0; y < NO_OF_CHARS; y++)
TT[0][y] = 0;
TT[0][p[0]] = 1;
// put entries in other rows
for (i = 1; i<= P; i++) { // Copy values from row at index longestprefixsuffix
for (y = 0; y < NO_OF_CHARS; y++)
TT[i][y] = TT[longestprefixsuffix][y];
// Update the entry
TT[i][p[i]] = i + 1;
// Update lps for next row to be filled
if (i < P)
longestprefixsuffix = TT[longestprefixsuffix][p[i]]; // TT is the 2D array which is being constructed.
}
}
//Prints all occurrences of pattern in text
void patternsearch(char *p, char *t) {
int P = strlen(p);
int T = strlen(t);
int TT[P+1][NO_OF_CHARS];
transitiontable(p, P, TT);
// process text over FA.
int i, j=0;
for (i = 0; i < T; i++) {
j = TT[j][t[i]];
if (j == P) {
cout<<"\n pattern is found at index: "<< i-P+1;
}
}
}
int main() {
char *text = "AABAA ABBAACCDD CCDDAABAA"; //take the text as input
char *pattern = "AABAA"; //take the pattern as input
patternsearch(pattern, text);
getchar();
return 0;
}
Output
pattern is found at index: 0
pattern is found at index: 20