7 Essential String Matching Algorithms in C

String Matching Algorithms

String Matching is a fundamental problem in computer science with applications in text processing, bioinformatics, data mining, and more. These algorithms help find patterns within larger texts efficiently.
# Algorithm Time Complexity Best For Resources
1 Naive (Brute Force) Algorithm O(n*m) Small texts, simple patterns GeeksforGeeks
2 Knuth-Morris-Pratt (KMP) Algorithm O(n+m) Texts with repeating patterns GeeksforGeeks
3 Boyer-Moore Algorithm O(n/m) best, O(n*m) worst English text, large alphabets GeeksforGeeks
4 Rabin-Karp Algorithm O(n+m) avg, O(n*m) worst Multiple pattern search GeeksforGeeks
5 Z Algorithm O(n+m) Prefix matching, linear time GeeksforGeeks
6 Trie Algorithm O(m) search, O(m) insert Dictionary search, prefix matching GeeksforGeeks
7 Suffix Tree Algorithm O(n) build, O(m) search Complex pattern matching GeeksforGeeks

Algorithm Implementations in C

1. Naive (Brute Force) Algorithm

The simplest approach that checks all possible positions in the text for the pattern.

#include <stdio.h>
#include <string.h>

void naiveSearch(char* text, char* pattern) {
    int n = strlen(text);
    int m = strlen(pattern);
    
    for (int i = 0; i <= n - m; i++) {
        int j;
        for (j = 0; j < m; j++) {
            if (text[i + j] != pattern[j])
                break;
        }
        if (j == m)
            printf("Pattern found at index %d\n", i);
    }
}

int main() {
    char text[] = "AABAACAADAABAAABAA";
    char pattern[] = "AABA";
    naiveSearch(text, pattern);
    return 0;
}

2. Knuth-Morris-Pratt (KMP) Algorithm

Uses a prefix table (LPS array) to avoid unnecessary comparisons.

#include <stdio.h>
#include <string.h>

void computeLPS(char* pattern, int m, int* lps) {
    int len = 0;
    lps[0] = 0;
    
    int i = 1;
    while (i < m) {
        if (pattern[i] == pattern[len]) {
            len++;
            lps[i] = len;
            i++;
        } else {
            if (len != 0) {
                len = lps[len - 1];
            } else {
                lps[i] = 0;
                i++;
            }
        }
    }
}

void KMPSearch(char* text, char* pattern) {
    int n = strlen(text);
    int m = strlen(pattern);
    
    int lps[m];
    computeLPS(pattern, m, lps);
    
    int i = 0, j = 0;
    while (i < n) {
        if (pattern[j] == text[i]) {
            i++;
            j++;
        }
        if (j == m) {
            printf("Pattern found at index %d\n", i - j);
            j = lps[j - 1];
        } else if (i < n && pattern[j] != text[i]) {
            if (j != 0)
                j = lps[j - 1];
            else
                i++;
        }
    }
}

int main() {
    char text[] = "ABABDABACDABABCABAB";
    char pattern[] = "ABABCABAB";
    KMPSearch(text, pattern);
    return 0;
}

3. Boyer-Moore Algorithm

Uses bad character heuristic and good suffix heuristic for efficient searching.

#include <stdio.h>
#include <string.h>
#include <limits.h>

#define NO_OF_CHARS 256

int max(int a, int b) { return (a > b) ? a : b; }

void badCharHeuristic(char* str, int size, int badchar[NO_OF_CHARS]) {
    for (int i = 0; i < NO_OF_CHARS; i++)
        badchar[i] = -1;
    for (int i = 0; i < size; i++)
        badchar[(int)str[i]] = i;
}

void boyerMooreSearch(char* text, char* pattern) {
    int m = strlen(pattern);
    int n = strlen(text);
    
    int badchar[NO_OF_CHARS];
    badCharHeuristic(pattern, m, badchar);
    
    int s = 0;
    while (s <= (n - m)) {
        int j = m - 1;
        while (j >= 0 && pattern[j] == text[s + j])
            j--;
        if (j < 0) {
            printf("Pattern found at index %d\n", s);
            s += (s + m < n) ? m - badchar[text[s + m]] : 1;
        } else {
            s += max(1, j - badchar[text[s + j]]);
        }
    }
}

int main() {
    char text[] = "ABAAABCD";
    char pattern[] = "ABC";
    boyerMooreSearch(text, pattern);
    return 0;
}

4. Rabin-Karp Algorithm

Uses hashing to find patterns, efficient for multiple pattern searches.

#include <stdio.h>
#include <string.h>

#define d 256

void rabinKarpSearch(char* text, char* pattern, int q) {
    int m = strlen(pattern);
    int n = strlen(text);
    int i, j;
    int p = 0; // hash value for pattern
    int t = 0; // hash value for text
    int h = 1;
    
    for (i = 0; i < m - 1; i++)
        h = (h * d) % q;
    
    for (i = 0; i < m; i++) {
        p = (d * p + pattern[i]) % q;
        t = (d * t + text[i]) % q;
    }
    
    for (i = 0; i <= n - m; i++) {
        if (p == t) {
            for (j = 0; j < m; j++) {
                if (text[i + j] != pattern[j])
                    break;
            }
            if (j == m)
                printf("Pattern found at index %d\n", i);
        }
        
        if (i < n - m) {
            t = (d * (t - text[i] * h) + text[i + m]) % q;
            if (t < 0)
                t = (t + q);
        }
    }
}

int main() {
    char text[] = "GEEKS FOR GEEKS";
    char pattern[] = "GEEK";
    int q = 101; // A prime number
    rabinKarpSearch(text, pattern, q);
    return 0;
}

Algorithm Performance Comparison

Algorithm Best Case Average Case Worst Case Space Complexity
Naive O(n) O(n*m) O(n*m) O(1)
KMP O(n+m) O(n+m) O(n+m) O(m)
Boyer-Moore O(n/m) O(n+m) O(n*m) O(k) (alphabet size)
Rabin-Karp O(n+m) O(n+m) O(n*m) O(1)
Z Algorithm O(n+m) O(n+m) O(n+m) O(n+m)
Trie O(m) O(m) O(m) O(m*k) (k = alphabet size)