7 Essential String Matching Algorithms in C
String Matching Algorithms
String Matching is a fundamental problem in computer science with applications in text processing, bioinformatics, data mining, and more. These algorithms help find patterns within larger texts efficiently.
| # | Algorithm | Time Complexity | Best For | Resources |
|---|---|---|---|---|
| 1 | Naive (Brute Force) Algorithm | O(n*m) | Small texts, simple patterns | GeeksforGeeks |
| 2 | Knuth-Morris-Pratt (KMP) Algorithm | O(n+m) | Texts with repeating patterns | GeeksforGeeks |
| 3 | Boyer-Moore Algorithm | O(n/m) best, O(n*m) worst | English text, large alphabets | GeeksforGeeks |
| 4 | Rabin-Karp Algorithm | O(n+m) avg, O(n*m) worst | Multiple pattern search | GeeksforGeeks |
| 5 | Z Algorithm | O(n+m) | Prefix matching, linear time | GeeksforGeeks |
| 6 | Trie Algorithm | O(m) search, O(m) insert | Dictionary search, prefix matching | GeeksforGeeks |
| 7 | Suffix Tree Algorithm | O(n) build, O(m) search | Complex pattern matching | GeeksforGeeks |
Algorithm Implementations in C
1. Naive (Brute Force) Algorithm
The simplest approach that checks all possible positions in the text for the pattern.
#include <stdio.h>
#include <string.h>
void naiveSearch(char* text, char* pattern) {
int n = strlen(text);
int m = strlen(pattern);
for (int i = 0; i <= n - m; i++) {
int j;
for (j = 0; j < m; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == m)
printf("Pattern found at index %d\n", i);
}
}
int main() {
char text[] = "AABAACAADAABAAABAA";
char pattern[] = "AABA";
naiveSearch(text, pattern);
return 0;
}
2. Knuth-Morris-Pratt (KMP) Algorithm
Uses a prefix table (LPS array) to avoid unnecessary comparisons.
#include <stdio.h>
#include <string.h>
void computeLPS(char* pattern, int m, int* lps) {
int len = 0;
lps[0] = 0;
int i = 1;
while (i < m) {
if (pattern[i] == pattern[len]) {
len++;
lps[i] = len;
i++;
} else {
if (len != 0) {
len = lps[len - 1];
} else {
lps[i] = 0;
i++;
}
}
}
}
void KMPSearch(char* text, char* pattern) {
int n = strlen(text);
int m = strlen(pattern);
int lps[m];
computeLPS(pattern, m, lps);
int i = 0, j = 0;
while (i < n) {
if (pattern[j] == text[i]) {
i++;
j++;
}
if (j == m) {
printf("Pattern found at index %d\n", i - j);
j = lps[j - 1];
} else if (i < n && pattern[j] != text[i]) {
if (j != 0)
j = lps[j - 1];
else
i++;
}
}
}
int main() {
char text[] = "ABABDABACDABABCABAB";
char pattern[] = "ABABCABAB";
KMPSearch(text, pattern);
return 0;
}
3. Boyer-Moore Algorithm
Uses bad character heuristic and good suffix heuristic for efficient searching.
#include <stdio.h>
#include <string.h>
#include <limits.h>
#define NO_OF_CHARS 256
int max(int a, int b) { return (a > b) ? a : b; }
void badCharHeuristic(char* str, int size, int badchar[NO_OF_CHARS]) {
for (int i = 0; i < NO_OF_CHARS; i++)
badchar[i] = -1;
for (int i = 0; i < size; i++)
badchar[(int)str[i]] = i;
}
void boyerMooreSearch(char* text, char* pattern) {
int m = strlen(pattern);
int n = strlen(text);
int badchar[NO_OF_CHARS];
badCharHeuristic(pattern, m, badchar);
int s = 0;
while (s <= (n - m)) {
int j = m - 1;
while (j >= 0 && pattern[j] == text[s + j])
j--;
if (j < 0) {
printf("Pattern found at index %d\n", s);
s += (s + m < n) ? m - badchar[text[s + m]] : 1;
} else {
s += max(1, j - badchar[text[s + j]]);
}
}
}
int main() {
char text[] = "ABAAABCD";
char pattern[] = "ABC";
boyerMooreSearch(text, pattern);
return 0;
}
4. Rabin-Karp Algorithm
Uses hashing to find patterns, efficient for multiple pattern searches.
#include <stdio.h>
#include <string.h>
#define d 256
void rabinKarpSearch(char* text, char* pattern, int q) {
int m = strlen(pattern);
int n = strlen(text);
int i, j;
int p = 0; // hash value for pattern
int t = 0; // hash value for text
int h = 1;
for (i = 0; i < m - 1; i++)
h = (h * d) % q;
for (i = 0; i < m; i++) {
p = (d * p + pattern[i]) % q;
t = (d * t + text[i]) % q;
}
for (i = 0; i <= n - m; i++) {
if (p == t) {
for (j = 0; j < m; j++) {
if (text[i + j] != pattern[j])
break;
}
if (j == m)
printf("Pattern found at index %d\n", i);
}
if (i < n - m) {
t = (d * (t - text[i] * h) + text[i + m]) % q;
if (t < 0)
t = (t + q);
}
}
}
int main() {
char text[] = "GEEKS FOR GEEKS";
char pattern[] = "GEEK";
int q = 101; // A prime number
rabinKarpSearch(text, pattern, q);
return 0;
}
Algorithm Performance Comparison
| Algorithm | Best Case | Average Case | Worst Case | Space Complexity |
|---|---|---|---|---|
| Naive | O(n) | O(n*m) | O(n*m) | O(1) |
| KMP | O(n+m) | O(n+m) | O(n+m) | O(m) |
| Boyer-Moore | O(n/m) | O(n+m) | O(n*m) | O(k) (alphabet size) |
| Rabin-Karp | O(n+m) | O(n+m) | O(n*m) | O(1) |
| Z Algorithm | O(n+m) | O(n+m) | O(n+m) | O(n+m) |
| Trie | O(m) | O(m) | O(m) | O(m*k) (k = alphabet size) |
Related Algorithm Resources