Pattern Matching Algorithms in C

Introduction to Pattern Matching

Pattern matching algorithms are fundamental to computer science and are widely used in text editors, search engines, bioinformatics, and data mining. These algorithms help find the occurrence of a pattern within a larger text.

In this guide, we'll explore six important pattern matching algorithms with their implementations in C:

Naive (Brute Force) Algorithm
Knuth-Morris-Pratt (KMP) Algorithm
Boyer-Moore Algorithm
Rabin-Karp Algorithm
Z Algorithm
Trie Algorithm

1. Naive (Brute Force) Algorithm

Overview

The simplest pattern matching algorithm that checks for all possible positions of the pattern in the text.

Time Complexity

Best Case: O(n) - When the first character of the pattern is not present in text
Worst Case: O(m*(n-m+1)) - When all characters of text and pattern are same

C Implementation

#include <stdio.h>
#include <string.h>

void naiveSearch(char* text, char* pattern) {
    int M = strlen(pattern);
    int N = strlen(text);
    
    for (int i = 0; i <= N - M; i++) {
        int j;
        for (j = 0; j < M; j++) {
            if (text[i + j] != pattern[j])
                break;
        }
        if (j == M) {
            printf("Pattern found at index %d\n", i);
        }
    }
}

int main() {
    char text[] = "AABAACAADAABAAABAA";
    char pattern[] = "AABA";
    naiveSearch(text, pattern);
    return 0;
}
            

S.No	Feature	Description	Complexity
1	Implementation	Simple nested loop implementation	O(mn)
2	Space	No preprocessing needed	O(1)
3	Use Case	Small texts or patterns	-

2. Knuth-Morris-Pratt (KMP) Algorithm

Overview

An efficient algorithm that uses the concept of the longest prefix suffix (LPS) to skip unnecessary comparisons.

Time Complexity

Preprocessing: O(m)
Searching: O(n)
Total: O(m+n)

C Implementation

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void computeLPS(char* pattern, int M, int* lps) {
    int len = 0;
    lps[0] = 0;
    int i = 1;
    
    while (i < M) {
        if (pattern[i] == pattern[len]) {
            len++;
            lps[i] = len;
            i++;
        } else {
            if (len != 0) {
                len = lps[len - 1];
            } else {
                lps[i] = 0;
                i++;
            }
        }
    }
}

void KMPSearch(char* text, char* pattern) {
    int M = strlen(pattern);
    int N = strlen(text);
    int* lps = (int*)malloc(M * sizeof(int));
    
    computeLPS(pattern, M, lps);
    
    int i = 0, j = 0;
    while (i < N) {
        if (pattern[j] == text[i]) {
            j++;
            i++;
        }
        if (j == M) {
            printf("Pattern found at index %d\n", i - j);
            j = lps[j - 1];
        } else if (i < N && pattern[j] != text[i]) {
            if (j != 0)
                j = lps[j - 1];
            else
                i++;
        }
    }
    free(lps);
}

int main() {
    char text[] = "ABABDABACDABABCABAB";
    char pattern[] = "ABABCABAB";
    KMPSearch(text, pattern);
    return 0;
}
            

S.No	Feature	Description	Complexity
1	LPS Array	Preprocesses pattern to create LPS array	O(m)
2	Searching	Efficient searching by skipping known matches	O(n)
3	Use Case	Text editors, DNA sequence analysis	-

3. Boyer-Moore Algorithm

Overview

An efficient algorithm that uses two heuristics: bad character rule and good suffix rule to skip sections of text.

Time Complexity

Best Case: O(n/m)
Worst Case: O(mn)
Average Case: O(n)

C Implementation

#include <stdio.h>
#include <string.h>
#include <limits.h>

#define NO_OF_CHARS 256

int max(int a, int b) { return (a > b) ? a : b; }

void badCharHeuristic(char* str, int size, int badchar[NO_OF_CHARS]) {
    for (int i = 0; i < NO_OF_CHARS; i++)
        badchar[i] = -1;
    for (int i = 0; i < size; i++)
        badchar[(int)str[i]] = i;
}

void searchBM(char* text, char* pattern) {
    int m = strlen(pattern);
    int n = strlen(text);
    int badchar[NO_OF_CHARS];
    
    badCharHeuristic(pattern, m, badchar);
    
    int s = 0;
    while (s <= (n - m)) {
        int j = m - 1;
        while (j >= 0 && pattern[j] == text[s + j])
            j--;
        if (j < 0) {
            printf("Pattern found at index %d\n", s);
            s += (s + m < n) ? m - badchar[text[s + m]] : 1;
        } else {
            s += max(1, j - badchar[text[s + j]]);
        }
    }
}

int main() {
    char text[] = "ABAAABCD";
    char pattern[] = "ABC";
    searchBM(text, pattern);
    return 0;
}
            

S.No	Feature	Description	Complexity
1	Bad Character Rule	Shifts pattern based on last occurrence of mismatched character	O(n/m)
2	Good Suffix Rule	Shifts pattern based on matching suffix (not shown in basic implementation)	O(n)
3	Use Case	Text editors, search engines (often fastest in practice)	-

4. Rabin-Karp Algorithm

Overview

Uses hashing to find patterns in text. Compares hash values of pattern with hash values of text windows.

Time Complexity

Average and Best Case: O(n+m)
Worst Case: O(mn) - When all windows have same hash as pattern

C Implementation

#include <stdio.h>
#include <string.h>

#define d 256 // Number of characters in input alphabet

void searchRK(char* pattern, char* text, int q) {
    int M = strlen(pattern);
    int N = strlen(text);
    int i, j;
    int p = 0; // hash value for pattern
    int t = 0; // hash value for text
    int h = 1;
    
    for (i = 0; i < M - 1; i++)
        h = (h * d) % q;
    
    for (i = 0; i < M; i++) {
        p = (d * p + pattern[i]) % q;
        t = (d * t + text[i]) % q;
    }
    
    for (i = 0; i <= N - M; i++) {
        if (p == t) {
            for (j = 0; j < M; j++) {
                if (text[i + j] != pattern[j])
                    break;
            }
            if (j == M)
                printf("Pattern found at index %d\n", i);
        }
        if (i < N - M) {
            t = (d * (t - text[i] * h) + text[i + M]) % q;
            if (t < 0)
                t = (t + q);
        }
    }
}

int main() {
    char text[] = "GEEKS FOR GEEKS";
    char pattern[] = "GEEK";
    int q = 101; // A prime number
    searchRK(pattern, text, q);
    return 0;
}
            

S.No	Feature	Description	Complexity
1	Hashing	Uses rolling hash to efficiently compute hash values	O(n+m)
2	Prime Number	Uses prime number to reduce hash collisions	-
3	Use Case	Plagiarism detection, multiple pattern search	-

5. Z Algorithm

Overview

Linear time pattern matching algorithm that constructs Z array to find all occurrences of a pattern.

Time Complexity

Preprocessing: O(n+m)
Searching: O(n+m)

C Implementation

#include <stdio.h>
#include <string.h>

void getZarr(char* str, int Z[]) {
    int n = strlen(str);
    int L, R, k;
    L = R = 0;
    
    for (int i = 1; i < n; ++i) {
        if (i > R) {
            L = R = i;
            while (R < n && str[R - L] == str[R])
                R++;
            Z[i] = R - L;
            R--;
        } else {
            k = i - L;
            if (Z[k] < R - i + 1)
                Z[i] = Z[k];
            else {
                L = i;
                while (R < n && str[R - L] == str[R])
                    R++;
                Z[i] = R - L;
                R--;
            }
        }
    }
}

void searchZ(char* text, char* pattern) {
    char concat[strlen(pattern) + strlen(text) + 2];
    strcpy(concat, pattern);
    strcat(concat, "$");
    strcat(concat, text);
    int l = strlen(concat);
    int Z[l];
    getZarr(concat, Z);
    
    for (int i = 0; i < l; ++i) {
        if (Z[i] == strlen(pattern))
            printf("Pattern found at index %d\n", i - strlen(pattern) - 1);
    }
}

int main() {
    char text[] = "GEEKS FOR GEEKS";
    char pattern[] = "GEEK";
    searchZ(text, pattern);
    return 0;
}
            

S.No	Feature	Description	Complexity
1	Z Array	Stores length of longest substring starting from i which is also a prefix	O(n+m)
2	Window Technique	Uses L and R window to optimize Z array construction	O(n)
3	Use Case	Linear time pattern matching, string compression	-

6. Trie Algorithm

Overview

A tree-like data structure that stores strings for efficient prefix searching and multiple pattern matching.

Time Complexity

Insert: O(m) per string (m is length of string)
Search: O(m) per string

C Implementation

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>

#define ALPHABET_SIZE 26

struct TrieNode {
    struct TrieNode* children[ALPHABET_SIZE];
    bool isEndOfWord;
};

struct TrieNode* createNode() {
    struct TrieNode* node = (struct TrieNode*)malloc(sizeof(struct TrieNode));
    node->isEndOfWord = false;
    for (int i = 0; i < ALPHABET_SIZE; i++)
        node->children[i] = NULL;
    return node;
}

void insert(struct TrieNode* root, char* key) {
    struct TrieNode* current = root;
    for (int i = 0; i < strlen(key); i++) {
        int index = key[i] - 'a';
        if (!current->children[index])
            current->children[index] = createNode();
        current = current->children[index];
    }
    current->isEndOfWord = true;
}

bool search(struct TrieNode* root, char* key) {
    struct TrieNode* current = root;
    for (int i = 0; i < strlen(key); i++) {
        int index = key[i] - 'a';
        if (!current->children[index])
            return false;
        current = current->children[index];
    }
    return (current != NULL && current->isEndOfWord);
}

int main() {
    struct TrieNode* root = createNode();
    insert(root, "the");
    insert(root, "a");
    insert(root, "there");
    insert(root, "answer");
    insert(root, "any");
    
    printf("%s\n", search(root, "the") ? "Found" : "Not found");
    printf("%s\n", search(root, "these") ? "Found" : "Not found");
    return 0;
}
            

S.No	Feature	Description	Complexity
1	Prefix Search	Efficiently finds all words with given prefix	O(m)
2	Space	Can be memory intensive (O(ALPHABET_SIZE * m * n))	O(n*m)
3	Use Case	Autocomplete, spell checkers, IP routing	-

Algorithm Comparison

Algorithm	Best Case	Worst Case	Average Case	Preprocessing Time	Space
Naive	O(n)	O(mn)	O(mn)	None	O(1)
KMP	O(n)	O(n)	O(n)	O(m)	O(m)
Boyer-Moore	O(n/m)	O(mn)	O(n)	O(m + k)	O(k)
Rabin-Karp	O(n+m)	O(mn)	O(n+m)	O(m)	O(1)
Z Algorithm	O(n+m)	O(n+m)	O(n+m)	O(n+m)	O(n+m)
Trie	O(m)	O(m)	O(m)	O(mn)	O(mn)

Related Algorithm Resources

Algorithm Problems

Interview Questions

Pattern Matching MCQs

Tech Hub