Pattern matching notes for C learners

String matching algorithms are fundamental to text processing, data mining, and many other applications. This guide covers the most important algorithms with C implementations.

Naive (Brute Force) Algorithm

The simplest string matching algorithm that checks all possible positions in the text for the pattern.

Time Complexity: O(n*m) (worst case)

Space Complexity: O(1) (no extra space)

C Implementation:

#include <stdio.h>
#include <string.h>

void naiveSearch(char* text, char* pattern) {
    int n = strlen(text);
    int m = strlen(pattern);
    
    for (int i = 0; i <= n - m; i++) {
        int j;
        for (j = 0; j < m; j++) {
            if (text[i + j] != pattern[j])
                break;
        }
        if (j == m) {
            printf("Pattern found at index %d\n", i);
        }
    }
}

int main() {
    char text[] = "AABAACAADAABAAABAA";
    char pattern[] = "AABA";
    naiveSearch(text, pattern);
    return 0;
}

When to Use:

When the text and pattern are very small
When simplicity is more important than efficiency
As a baseline for comparing other algorithms

Knuth-Morris-Pratt (KMP) Algorithm

An efficient algorithm that uses information about the pattern itself to skip unnecessary comparisons.

Time Complexity: O(n+m) (linear time)

Space Complexity: O(m) (for LPS array)

C Implementation:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void computeLPS(char* pattern, int m, int* lps) {
    int len = 0;
    lps[0] = 0;
    int i = 1;
    
    while (i < m) {
        if (pattern[i] == pattern[len]) {
            len++;
            lps[i] = len;
            i++;
        } else {
            if (len != 0) {
                len = lps[len - 1];
            } else {
                lps[i] = 0;
                i++;
            }
        }
    }
}

void KMPSearch(char* text, char* pattern) {
    int n = strlen(text);
    int m = strlen(pattern);
    
    int* lps = (int*)malloc(m * sizeof(int));
    computeLPS(pattern, m, lps);
    
    int i = 0, j = 0;
    while (i < n) {
        if (pattern[j] == text[i]) {
            i++;
            j++;
        }
        
        if (j == m) {
            printf("Pattern found at index %d\n", i - j);
            j = lps[j - 1];
        } else if (i < n && pattern[j] != text[i]) {
            if (j != 0)
                j = lps[j - 1];
            else
                i++;
        }
    }
    free(lps);
}

int main() {
    char text[] = "ABABDABACDABABCABAB";
    char pattern[] = "ABABCABAB";
    KMPSearch(text, pattern);
    return 0;
}

Key Features:

Uses Longest Prefix Suffix (LPS) array to avoid unnecessary comparisons
Preprocesses the pattern in O(m) time
Ideal for patterns with repeating subpatterns

Boyer-Moore Algorithm

An efficient algorithm that compares from the end of the pattern and uses two heuristics to skip sections of text.

Time Complexity: O(n/m) (best case) O(n*m) (worst case)

Space Complexity: O(m + k) (k is alphabet size)

C Implementation:

#include <stdio.h>
#include <string.h>
#include <limits.h>

#define NO_OF_CHARS 256

int max(int a, int b) { return (a > b) ? a : b; }

void badCharHeuristic(char* str, int size, int badchar[NO_OF_CHARS]) {
    for (int i = 0; i < NO_OF_CHARS; i++)
        badchar[i] = -1;
    
    for (int i = 0; i < size; i++)
        badchar[(int)str[i]] = i;
}

void boyerMooreSearch(char* text, char* pattern) {
    int m = strlen(pattern);
    int n = strlen(text);
    
    int badchar[NO_OF_CHARS];
    badCharHeuristic(pattern, m, badchar);
    
    int s = 0;
    while (s <= (n - m)) {
        int j = m - 1;
        
        while (j >= 0 && pattern[j] == text[s + j])
            j--;
        
        if (j < 0) {
            printf("Pattern found at index %d\n", s);
            s += (s + m < n) ? m - badchar[text[s + m]] : 1;
        } else {
            s += max(1, j - badchar[text[s + j]]);
        }
    }
}

int main() {
    char text[] = "ABAAABCDBBABCDDEBCABC";
    char pattern[] = "ABC";
    boyerMooreSearch(text, pattern);
    return 0;
}

Key Features:

Uses Bad Character Heuristic and Good Suffix Heuristic
Often faster than KMP in practice for English text
Works best with large alphabets and small patterns

Rabin-Karp Algorithm

A pattern searching algorithm that uses hashing to find patterns in text.

Time Complexity: O(n+m) (average case) O(n*m) (worst case)

Space Complexity: O(1)

C Implementation:

#include <stdio.h>
#include <string.h>

#define d 256 // Number of characters in input alphabet

void rabinKarpSearch(char* pattern, char* text, int q) {
    int M = strlen(pattern);
    int N = strlen(text);
    int i, j;
    int p = 0; // hash value for pattern
    int t = 0; // hash value for text
    int h = 1;
    
    // Calculate h = pow(d, M-1) % q
    for (i = 0; i < M - 1; i++)
        h = (h * d) % q;
    
    // Calculate initial hash values
    for (i = 0; i < M; i++) {
        p = (d * p + pattern[i]) % q;
        t = (d * t + text[i]) % q;
    }
    
    for (i = 0; i <= N - M; i++) {
        if (p == t) {
            for (j = 0; j < M; j++) {
                if (text[i + j] != pattern[j])
                    break;
            }
            if (j == M)
                printf("Pattern found at index %d\n", i);
        }
        
        if (i < N - M) {
            t = (d * (t - text[i] * h) + text[i + M]) % q;
            if (t < 0)
                t = (t + q);
        }
    }
}

int main() {
    char text[] = "GEEKS FOR GEEKS";
    char pattern[] = "GEEK";
    int q = 101; // A prime number
    rabinKarpSearch(pattern, text, q);
    return 0;
}

Key Features:

Uses hashing to compare patterns
Can be extended to 2D pattern matching
Effective for multiple pattern search

Z Algorithm

Linear time pattern searching algorithm that constructs a Z array to find patterns.

Time Complexity: O(n+m)

Space Complexity: O(n+m)

C Implementation:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void getZArray(char* str, int Z[]) {
    int n = strlen(str);
    int L, R, k;
    
    L = R = 0;
    for (int i = 1; i < n; ++i) {
        if (i > R) {
            L = R = i;
            while (R < n && str[R - L] == str[R])
                R++;
            Z[i] = R - L;
            R--;
        } else {
            k = i - L;
            if (Z[k] < R - i + 1)
                Z[i] = Z[k];
            else {
                L = i;
                while (R < n && str[R - L] == str[R])
                    R++;
                Z[i] = R - L;
                R--;
            }
        }
    }
}

void ZSearch(char* text, char* pattern) {
    char concat[strlen(pattern) + strlen(text) + 2];
    strcpy(concat, pattern);
    strcat(concat, "$");
    strcat(concat, text);
    int l = strlen(concat);
    
    int* Z = (int*)malloc(l * sizeof(int));
    getZArray(concat, Z);
    
    for (int i = 0; i < l; ++i) {
        if (Z[i] == strlen(pattern))
            printf("Pattern found at index %d\n", i - strlen(pattern) - 1);
    }
    free(Z);
}

int main() {
    char text[] = "GEEKS FOR GEEKS";
    char pattern[] = "GEEK";
    ZSearch(text, pattern);
    return 0;
}

Key Features:

Constructs Z array in linear time
Similar to KMP but uses a different approach
Useful for finding all occurrences of a pattern

Trie Algorithm

A tree-like data structure that stores strings for efficient pattern matching and prefix searches.

Time Complexity: O(m) (insert/search per string)

Space Complexity: O(ALPHABET_SIZE * m * N) (N = number of keys)

C Implementation:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>

#define ALPHABET_SIZE 26

struct TrieNode {
    struct TrieNode* children[ALPHABET_SIZE];
    bool isEndOfWord;
};

struct TrieNode* createNode() {
    struct TrieNode* node = (struct TrieNode*)malloc(sizeof(struct TrieNode));
    node->isEndOfWord = false;
    
    for (int i = 0; i < ALPHABET_SIZE; i++)
        node->children[i] = NULL;
    
    return node;
}

void insert(struct TrieNode* root, char* key) {
    struct TrieNode* current = root;
    
    for (int i = 0; i < strlen(key); i++) {
        int index = key[i] - 'a';
        if (!current->children[index])
            current->children[index] = createNode();
        current = current->children[index];
    }
    current->isEndOfWord = true;
}

bool search(struct TrieNode* root, char* key) {
    struct TrieNode* current = root;
    
    for (int i = 0; i < strlen(key); i++) {
        int index = key[i] - 'a';
        if (!current->children[index])
            return false;
        current = current->children[index];
    }
    return (current != NULL && current->isEndOfWord);
}

int main() {
    struct TrieNode* root = createNode();
    
    insert(root, "hello");
    insert(root, "world");
    insert(root, "trie");
    insert(root, "data");
    
    printf("Search 'hello': %s\n", search(root, "hello") ? "Found" : "Not Found");
    printf("Search 'world': %s\n", search(root, "world") ? "Found" : "Not Found");
    printf("Search 'algorithm': %s\n", search(root, "algorithm") ? "Found" : "Not Found");
    
    return 0;
}

Key Features:

Excellent for dictionary implementations
Efficient prefix searches (autocomplete)
Can be memory intensive for large datasets

Algorithm Comparison

Algorithm	Best Case	Worst Case	Space	When to Use
Naive	O(n)	O(n*m)	O(1)	Small inputs, simple implementation
KMP	O(n)	O(n+m)	O(m)	Patterns with repeating subpatterns
Boyer-Moore	O(n/m)	O(n*m)	O(m + k)	Large alphabets, English text
Rabin-Karp	O(n+m)	O(n*m)	O(1)	Multiple pattern search, 2D matching
Z Algorithm	O(n+m)	O(n+m)	O(n+m)	All occurrences of pattern
Trie	O(m)	O(m)	O(ALPHABET_SIZEmN)	Dictionary, prefix searches

STRING MATCHING ALGORITHMS IN C

Naive (Brute Force) Algorithm

C Implementation:

When to Use:

Knuth-Morris-Pratt (KMP) Algorithm

C Implementation:

Key Features:

Boyer-Moore Algorithm

C Implementation:

Key Features:

Rabin-Karp Algorithm

C Implementation:

Key Features:

Z Algorithm

C Implementation:

Key Features:

Trie Algorithm

C Implementation:

Key Features:

Algorithm Comparison