From cd74ebbe1f82fa3d3fdc27f1e17736710dafbedd Mon Sep 17 00:00:00 2001 From: ryanzone Date: Sat, 28 Feb 2026 16:01:20 +0530 Subject: [PATCH 1/2] created huffman coding algo using greedy --- greedy_approach/huffman.c | 192 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 greedy_approach/huffman.c diff --git a/greedy_approach/huffman.c b/greedy_approach/huffman.c new file mode 100644 index 0000000000..5b6f3fb413 --- /dev/null +++ b/greedy_approach/huffman.c @@ -0,0 +1,192 @@ +/** + * Huffman coding implementation in C. + * Huffman Coding is a lossless data compression algorithm. It assigns + * variable-length codes to input characters, with shorter codes assigned to + * more frequent characters. This algorithm makes sure that the most common + * characters are represented by shorter bit strings, reducing the overall size + * of the encoded data. + * + * @author GeeksforGeeks (https://www.geeksforgeeks.org/c/huffman-coding-in-c/) + * + * functions used in this implementation: + * + * - newNode(): creates a new node for the Huffman tree. + * - push(): adds a node to the min-heap. + * - pop(): removes and returns the node with the smallest frequency from the + * min-heap. + * - buildCodes(): generates the Huffman codes for each character by traversing + * the Huffman tree. + * + * My notes from my DAA class + * @author Ryan John Mathew: https://github.com/ryanzone + * the diagram representation of the tree with + * sample structure of the tree with steps: + * + * 1. [5] [9] [12] [13] [16] [45] + * \ / + * [14] [12] [16] [45] + * + * + * + * 2. [12] [13] [14] [16] [45] + * / \ + * \ / [5] [9] + * + * [25] [14] [16] [45] + * / \ + * [5] [9] + * + * + * + * 3. [25] [30] [45] + * \ / / \ + * [12] [13] [14] [16] + * \ / + * [5] [9] + * + * + * + * 4. [45] [55] + * / \ + * [25] [30] + * / \ / \ + * [12] [13] [14] [16] + * / \ + * [5] [9] + * + * + * + * 5. [100] + * / \ + * [45] [55] + * / \ + * [25] [30] + * / \ / \ + * [12] [13] [14] [16] + * / \ + * [5] [9] + */ +#include +#include +#include +#include +#include + +#define MAX 256 + +typedef struct Node +{ + char ch; + int freq; + struct Node *l, *r; +} Node; + +Node* heap[MAX]; +int hs = 0; + +Node* newNode(char ch, int freq, Node* l, Node* r) +{ + Node* n = malloc(sizeof(Node)); + *n = (Node){ch, freq, l, r}; + return n; +} + +void push(Node* n) +{ + int i = hs++; + while (i > 0 && n->freq < heap[(i - 1) / 2]->freq) + { + heap[i] = heap[(i - 1) / 2]; + i = (i - 1) / 2; + } + heap[i] = n; +} + +Node* pop() +{ + Node* min = heap[0]; + Node* last = heap[--hs]; + int i = 0, c; + while ((c = 2 * i + 1) < hs) + { + if (c + 1 < hs && heap[c + 1]->freq < heap[c]->freq) + c++; + if (last->freq <= heap[c]->freq) + break; + heap[i] = heap[c]; + i = c; + } + heap[i] = last; + return min; +} + +char codes[MAX][MAX]; +int clen[MAX]; + +void buildCodes(Node* n, int* code, int d) +{ + if (!n->l && !n->r) + { + for (int i = 0; i < d; i++) + codes[(unsigned char)n->ch][i] = '0' + code[i]; + codes[(unsigned char)n->ch][d] = '\0'; + clen[(unsigned char)n->ch] = d; + return; + } + code[d] = 0; + buildCodes(n->l, code, d + 1); + code[d] = 1; + buildCodes(n->r, code, d + 1); +} + +int main() +{ + srand(time(NULL)); + int n = rand() % 5 + 4; + char chars[8]; + int freq[8], used[26] = {}; + + for (int i = 0; i < n; i++) + { + int r; + do + { + r = rand() % 26; + } while (used[r]); + used[r] = 1; + chars[i] = 'a' + r; + freq[i] = rand() % 50 + 1; + } + + printf("Input:\n"); + for (int i = 0; i < n; i++) printf(" '%c' = %d\n", chars[i], freq[i]); + + for (int i = 0; i < n; i++) push(newNode(chars[i], freq[i], NULL, NULL)); + while (hs > 1) + { + Node *l = pop(), *r = pop(); + push(newNode('$', l->freq + r->freq, l, r)); + } + + Node* root = pop(); + int code[MAX]; + buildCodes(root, code, 0); + + printf("\nChar | Freq | Code | Bits\n"); + printf("-----|------|--------------|-----\n"); + int total = 0, tbits = 0; + for (int i = 0; i < n; i++) + { + int b = freq[i] * clen[(unsigned char)chars[i]]; + tbits += b; + total += freq[i]; + printf(" '%c' | %-3d | %-12s | %d\n", chars[i], freq[i], + codes[(unsigned char)chars[i]], b); + } + + int fb = total * (int)ceil(log2(n)); + printf("\nTotal chars : %d\n", total); + printf("Huffman bits : %d\n", tbits); + printf("Fixed bits : %d (%d-bit)\n", fb, (int)ceil(log2(n))); + printf("Space saved : %.1f%%\n", 100.0 * (fb - tbits) / fb); +} From 9d7b6278f5fe375dfb4ca8123c1014783f5ae86a Mon Sep 17 00:00:00 2001 From: ryanzone Date: Sat, 28 Feb 2026 16:31:41 +0530 Subject: [PATCH 2/2] refactor: rewrite Huffman implementation to follow project guidelines --- greedy_approach/huffman.c | 206 +++++++++++++++++++++----------------- 1 file changed, 112 insertions(+), 94 deletions(-) diff --git a/greedy_approach/huffman.c b/greedy_approach/huffman.c index 5b6f3fb413..20f9ca3399 100644 --- a/greedy_approach/huffman.c +++ b/greedy_approach/huffman.c @@ -1,13 +1,4 @@ /** - * Huffman coding implementation in C. - * Huffman Coding is a lossless data compression algorithm. It assigns - * variable-length codes to input characters, with shorter codes assigned to - * more frequent characters. This algorithm makes sure that the most common - * characters are represented by shorter bit strings, reducing the overall size - * of the encoded data. - * - * @author GeeksforGeeks (https://www.geeksforgeeks.org/c/huffman-coding-in-c/) - * * functions used in this implementation: * * - newNode(): creates a new node for the Huffman tree. @@ -66,127 +57,154 @@ * / \ * [5] [9] */ -#include + #include #include -#include -#include #define MAX 256 typedef struct Node { - char ch; - int freq; - struct Node *l, *r; + char data; + unsigned freq; + struct Node *left, *right; } Node; -Node* heap[MAX]; -int hs = 0; +typedef struct +{ + unsigned size; + unsigned capacity; + Node** array; +} MinHeap; -Node* newNode(char ch, int freq, Node* l, Node* r) +Node* createNode(char data, unsigned freq) { - Node* n = malloc(sizeof(Node)); - *n = (Node){ch, freq, l, r}; - return n; + Node* node = (Node*)malloc(sizeof(Node)); + node->data = data; + node->freq = freq; + node->left = node->right = NULL; + return node; } -void push(Node* n) +MinHeap* createMinHeap(unsigned capacity) { - int i = hs++; - while (i > 0 && n->freq < heap[(i - 1) / 2]->freq) - { - heap[i] = heap[(i - 1) / 2]; - i = (i - 1) / 2; - } - heap[i] = n; + MinHeap* heap = (MinHeap*)malloc(sizeof(MinHeap)); + heap->size = 0; + heap->capacity = capacity; + heap->array = (Node**)malloc(capacity * sizeof(Node*)); + return heap; +} + +void swapNode(Node** a, Node** b) +{ + Node* temp = *a; + *a = *b; + *b = temp; } -Node* pop() +void minHeapify(MinHeap* heap, int idx) { - Node* min = heap[0]; - Node* last = heap[--hs]; - int i = 0, c; - while ((c = 2 * i + 1) < hs) + int smallest = idx; + int left = 2 * idx + 1; + int right = 2 * idx + 2; + + if (left < heap->size && + heap->array[left]->freq < heap->array[smallest]->freq) + smallest = left; + + if (right < heap->size && + heap->array[right]->freq < heap->array[smallest]->freq) + smallest = right; + + if (smallest != idx) { - if (c + 1 < hs && heap[c + 1]->freq < heap[c]->freq) - c++; - if (last->freq <= heap[c]->freq) - break; - heap[i] = heap[c]; - i = c; + swapNode(&heap->array[smallest], &heap->array[idx]); + minHeapify(heap, smallest); } - heap[i] = last; - return min; } -char codes[MAX][MAX]; -int clen[MAX]; - -void buildCodes(Node* n, int* code, int d) +Node* extractMin(MinHeap* heap) { - if (!n->l && !n->r) + Node* temp = heap->array[0]; + heap->array[0] = heap->array[heap->size - 1]; + heap->size--; + minHeapify(heap, 0); + return temp; +} +void insertMinHeap(MinHeap* heap, Node* node) +{ + heap->size++; + int i = heap->size - 1; + + while (i && node->freq < heap->array[(i - 1) / 2]->freq) { - for (int i = 0; i < d; i++) - codes[(unsigned char)n->ch][i] = '0' + code[i]; - codes[(unsigned char)n->ch][d] = '\0'; - clen[(unsigned char)n->ch] = d; - return; + heap->array[i] = heap->array[(i - 1) / 2]; + i = (i - 1) / 2; } - code[d] = 0; - buildCodes(n->l, code, d + 1); - code[d] = 1; - buildCodes(n->r, code, d + 1); + + heap->array[i] = node; } +int isSizeOne(MinHeap* heap) { return (heap->size == 1); } -int main() +Node* buildHuffmanTree(char data[], int freq[], int size) { - srand(time(NULL)); - int n = rand() % 5 + 4; - char chars[8]; - int freq[8], used[26] = {}; + Node *left, *right, *top; + + MinHeap* heap = createMinHeap(size); + + for (int i = 0; i < size; ++i) + heap->array[i] = createNode(data[i], freq[i]); + + heap->size = size; + + for (int i = (heap->size - 2) / 2; i >= 0; --i) minHeapify(heap, i); - for (int i = 0; i < n; i++) + while (!isSizeOne(heap)) { - int r; - do - { - r = rand() % 26; - } while (used[r]); - used[r] = 1; - chars[i] = 'a' + r; - freq[i] = rand() % 50 + 1; - } + left = extractMin(heap); + right = extractMin(heap); + + top = createNode('$', left->freq + right->freq); + top->left = left; + top->right = right; - printf("Input:\n"); - for (int i = 0; i < n; i++) printf(" '%c' = %d\n", chars[i], freq[i]); + insertMinHeap(heap, top); + } - for (int i = 0; i < n; i++) push(newNode(chars[i], freq[i], NULL, NULL)); - while (hs > 1) + return extractMin(heap); +} +void printCodes(Node* root, int arr[], int top) +{ + if (root->left) { - Node *l = pop(), *r = pop(); - push(newNode('$', l->freq + r->freq, l, r)); + arr[top] = 0; + printCodes(root->left, arr, top + 1); } - Node* root = pop(); - int code[MAX]; - buildCodes(root, code, 0); + if (root->right) + { + arr[top] = 1; + printCodes(root->right, arr, top + 1); + } - printf("\nChar | Freq | Code | Bits\n"); - printf("-----|------|--------------|-----\n"); - int total = 0, tbits = 0; - for (int i = 0; i < n; i++) + if (!root->left && !root->right) { - int b = freq[i] * clen[(unsigned char)chars[i]]; - tbits += b; - total += freq[i]; - printf(" '%c' | %-3d | %-12s | %d\n", chars[i], freq[i], - codes[(unsigned char)chars[i]], b); + printf("%c: ", root->data); + for (int i = 0; i < top; ++i) printf("%d", arr[i]); + printf("\n"); } +} + +int main(void) +{ + char arr[] = {'a', 'b', 'c', 'd', 'e', 'f'}; + int freq[] = {5, 9, 12, 13, 16, 45}; + int size = sizeof(arr) / sizeof(arr[0]); + + Node* root = buildHuffmanTree(arr, freq, size); + + int codes[MAX]; + printCodes(root, codes, 0); - int fb = total * (int)ceil(log2(n)); - printf("\nTotal chars : %d\n", total); - printf("Huffman bits : %d\n", tbits); - printf("Fixed bits : %d (%d-bit)\n", fb, (int)ceil(log2(n))); - printf("Space saved : %.1f%%\n", 100.0 * (fb - tbits) / fb); + return 0; }