diff --git a/.gitignore b/.gitignore index 16fad05..95dd74e 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ /decoded.txt /src/decoded.txt /.DS_Store +/src/lib/ diff --git a/README.md b/README.md index 408ea4a..3b6e50b 100644 --- a/README.md +++ b/README.md @@ -25,11 +25,11 @@ Average Time: 1291.0 It is found that 4-ary heap is the fastest. So later encoding is done using 4-ary heap. -* encoder.java: Produces code_table.txt containing codes for all the unique messages; and encoded.bin, which is the binary file for the codes corresponding to the input file. +* Encoder.java: Produces code_table.txt containing codes for all the unique messages; and encoded.bin, which is the binary file for the codes corresponding to the input file. Usage: ``` -~:src$ java encoder ../sample_input_large.txt +~:src$ java Encoder ../sample_input_large.txt Reading input file ... Building Freq Table ... Building huffman tree... Done. @@ -39,11 +39,11 @@ Generating encoded.bin .. Done. As a proof of concept, we will reconstruct our input using code_table.txt and encoded.bin -* decoder.java: Takes in input encoded.bin and code_table.txt and produces decoded.txt which is exactly the same as sample_input_large.txt as used in encoder. +* Decoder.java: Takes in input encoded.bin and code_table.txt and produces decoded.txt which is exactly the same as sample_input_large.txt as used in Encoder. Usage: ``` -~:src$ java decoder encoded.bin code_table.txt +~:src$ java Decoder encoded.bin code_table.txt Building huffman tree from code_table.txt .. Reading code_table.txt .. Done. Reading encoded.bin .. Done. diff --git a/src/BinaryHeap.java b/src/BinaryHeap.java index dcc39f6..d1bd965 100644 --- a/src/BinaryHeap.java +++ b/src/BinaryHeap.java @@ -1,17 +1,15 @@ class BinaryHeap { private Node[] data; - public int heap_size; - - public void stub(){ - System.out.println("Reached stub func"); - } + private int heap_size; /** Constructor **/ public BinaryHeap(int num_nodes){ data = new Node[num_nodes]; heap_size = 0; } + + public int getSize() { return heap_size; } public boolean is_empty() { return (heap_size == 0); @@ -46,8 +44,7 @@ public Node del_min(){ if (is_empty()) return null; else{ - Node min_node = new Node(0,-1); - min_node = data[0]; + Node min_node = data[0]; data[0] = data[heap_size-1]; heap_size--; if (heap_size>0) @@ -57,41 +54,44 @@ public Node del_min(){ } private void manage_heap_upwards(int node_index){ - int parent_index; - Node temp = new Node(0,-1); - if (node_index!=0){ - parent_index = get_parent_index(node_index); - if (data[parent_index].get_freq() >= data[node_index].get_freq()){ - temp = data[parent_index]; + while (node_index != 0) { + int parent_index = get_parent_index(node_index); + if (data[parent_index].get_freq() > data[node_index].get_freq()){ + Node temp = data[parent_index]; data[parent_index] = data[node_index]; data[node_index] = temp; - manage_heap_upwards(parent_index); + node_index = parent_index; + } else { + break; } } } private void manage_heap_downwards(int node_index){ - int l_index,r_index,min_index; - Node temp = new Node(0,-1); - l_index = get_left_child_index(node_index); - r_index = get_right_child_index(node_index); - if (r_index>=heap_size){ - if (l_index>=heap_size) - return; - else - min_index = l_index; - } - else { - if (data[l_index].get_freq() <= data[r_index].get_freq()) - min_index = l_index; - else - min_index = r_index; - } - if (data[node_index].get_freq() > data[min_index].get_freq()){ - temp = data[min_index]; - data[min_index] = data[node_index]; - data[node_index] = temp; - manage_heap_downwards(min_index); + while (true) { + int l_index = get_left_child_index(node_index); + int r_index = get_right_child_index(node_index); + int min_index; + if (r_index>=heap_size){ + if (l_index>=heap_size) + break; + else + min_index = l_index; + } + else { + if (data[l_index].get_freq() <= data[r_index].get_freq()) + min_index = l_index; + else + min_index = r_index; + } + if (data[node_index].get_freq() > data[min_index].get_freq()){ + Node temp = data[min_index]; + data[min_index] = data[node_index]; + data[node_index] = temp; + node_index = min_index; + } else { + break; + } } } diff --git a/src/BinaryHeapTest.java b/src/BinaryHeapTest.java new file mode 100644 index 0000000..90c7ffd --- /dev/null +++ b/src/BinaryHeapTest.java @@ -0,0 +1,107 @@ +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +class BinaryHeapTest { + + @Test + void testInsertAndDelMin() { + BinaryHeap heap = new BinaryHeap(10); + int[] freqs = {5, 3, 8, 1, 4}; + for (int f : freqs) { + heap.insert(new Node(f, f)); + } + int[] expected = {1, 3, 4, 5, 8}; + for (int e : expected) { + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(e, n.get_freq()); + } + } + + @Test + void testIsEmpty() { + BinaryHeap heap = new BinaryHeap(10); + assertTrue(heap.is_empty()); + heap.insert(new Node(5, 0)); + assertFalse(heap.is_empty()); + heap.del_min(); + assertTrue(heap.is_empty()); + } + + @Test + void testSingleElement() { + BinaryHeap heap = new BinaryHeap(10); + heap.insert(new Node(42, 0)); + assertFalse(heap.is_empty()); + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(42, n.get_freq()); + assertTrue(heap.is_empty()); + } + + @Test + void testEqualFrequencies() { + BinaryHeap heap = new BinaryHeap(10); + heap.insert(new Node(7, 0)); + heap.insert(new Node(7, 1)); + heap.insert(new Node(7, 2)); + List msgs = new ArrayList<>(); + while (!heap.is_empty()) { + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(7, n.get_freq()); + msgs.add(n.get_msg()); + } + assertEquals(3, msgs.size()); + Collections.sort(msgs); + assertEquals(List.of(0, 1, 2), msgs); + } + + @Test + void testGetRoot() { + BinaryHeap heap = new BinaryHeap(10); + heap.insert(new Node(5, 0)); + heap.insert(new Node(3, 1)); + heap.insert(new Node(8, 2)); + Node root = heap.get_root(); + assertNotNull(root); + assertEquals(3, root.get_freq()); + // get_root should not remove the element + assertEquals(3, heap.getSize()); + } + + @Test + void testDelMinOnEmpty() { + BinaryHeap heap = new BinaryHeap(10); + assertNull(heap.del_min()); + } + + @Test + void testGetRootOnEmpty() { + BinaryHeap heap = new BinaryHeap(10); + assertNull(heap.get_root()); + } + + @Test + void testManyElements() { + Random rng = new Random(12345); + int n = 100; + BinaryHeap heap = new BinaryHeap(n); + for (int i = 0; i < n; i++) { + heap.insert(new Node(rng.nextInt(10000), i)); + } + int prev = -1; + for (int i = 0; i < n; i++) { + Node node = heap.del_min(); + assertNotNull(node); + assertTrue(node.get_freq() >= prev, "Extraction order must be non-decreasing"); + prev = node.get_freq(); + } + assertTrue(heap.is_empty()); + } +} diff --git a/src/D_aryHeap.java b/src/D_aryHeap.java index a2e35dd..58d89cc 100644 --- a/src/D_aryHeap.java +++ b/src/D_aryHeap.java @@ -1,12 +1,8 @@ class D_aryHeap { - public void stub(){ - System.out.println("Reached stub func"); - } - private Node[] data; private int d; - public int heap_size; + private int heap_size; /** Constructor **/ public D_aryHeap(int num_nodes, int d){ @@ -14,6 +10,8 @@ public D_aryHeap(int num_nodes, int d){ this.d = d; this.heap_size = 3; } + + public int getSize() { return heap_size; } public boolean is_empty() { return (heap_size == 3); @@ -40,8 +38,7 @@ public Node del_min(){ if (is_empty()) return null; else{ - Node min_node = new Node(0,-1); - min_node = data[3]; + Node min_node = data[3]; data[3] = data[heap_size-1]; heap_size--; if (heap_size>3) @@ -51,29 +48,29 @@ public Node del_min(){ } private void manage_heap_upwards(int node_index) { - int parent_index; - Node temp = new Node(0,-1); - if (node_index!=3){ - parent_index = get_parent_index(node_index); - if (data[parent_index].get_freq() >= data[node_index].get_freq()){ - temp = data[parent_index]; + while (node_index != 3) { + int parent_index = get_parent_index(node_index); + if (data[parent_index].get_freq() > data[node_index].get_freq()){ + Node temp = data[parent_index]; data[parent_index] = data[node_index]; data[node_index] = temp; - manage_heap_upwards(parent_index); + node_index = parent_index; + } else { + break; } } } private void manage_heap_downwards(int node_index){ - Node temp = new Node(0,-1); - int min_index; - if (get_k_child_index(node_index, 1)= data[min_index].get_freq()){ - temp = data[min_index]; + while (get_k_child_index(node_index, 1) < heap_size) { + int min_index = get_min_child_index(node_index); + if (data[node_index].get_freq() > data[min_index].get_freq()){ + Node temp = data[min_index]; data[min_index] = data[node_index]; data[node_index] = temp; - manage_heap_downwards(min_index); + node_index = min_index; + } else { + break; } } } diff --git a/src/D_aryHeapTest.java b/src/D_aryHeapTest.java new file mode 100644 index 0000000..a7c2710 --- /dev/null +++ b/src/D_aryHeapTest.java @@ -0,0 +1,132 @@ +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +class D_aryHeapTest { + + @Test + void testInsertAndDelMinWith4Way() { + D_aryHeap heap = new D_aryHeap(10, 4); + int[] freqs = {5, 3, 8, 1, 4}; + for (int f : freqs) { + heap.insert(new Node(f, f)); + } + int[] expected = {1, 3, 4, 5, 8}; + for (int e : expected) { + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(e, n.get_freq()); + } + } + + @Test + void testIsEmpty() { + D_aryHeap heap = new D_aryHeap(10, 4); + assertTrue(heap.is_empty()); + heap.insert(new Node(5, 0)); + assertFalse(heap.is_empty()); + heap.del_min(); + assertTrue(heap.is_empty()); + } + + @Test + void testSingleElement() { + D_aryHeap heap = new D_aryHeap(10, 4); + heap.insert(new Node(42, 0)); + assertFalse(heap.is_empty()); + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(42, n.get_freq()); + assertTrue(heap.is_empty()); + } + + @Test + void testEqualFrequencies() { + D_aryHeap heap = new D_aryHeap(10, 4); + heap.insert(new Node(7, 0)); + heap.insert(new Node(7, 1)); + heap.insert(new Node(7, 2)); + List msgs = new ArrayList<>(); + while (!heap.is_empty()) { + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(7, n.get_freq()); + msgs.add(n.get_msg()); + } + assertEquals(3, msgs.size()); + Collections.sort(msgs); + assertEquals(List.of(0, 1, 2), msgs); + } + + @Test + void testGetRoot() { + D_aryHeap heap = new D_aryHeap(10, 4); + heap.insert(new Node(5, 0)); + heap.insert(new Node(3, 1)); + heap.insert(new Node(8, 2)); + Node root = heap.get_root(); + assertNotNull(root); + assertEquals(3, root.get_freq()); + // getSize should still include all 3 elements (offset 3 + 3 = 6) + assertEquals(6, heap.getSize()); + } + + @Test + void testDelMinOnEmpty() { + D_aryHeap heap = new D_aryHeap(10, 4); + assertNull(heap.del_min()); + } + + @Test + void testWith2Way() { + D_aryHeap heap = new D_aryHeap(10, 2); + int[] freqs = {5, 3, 8, 1, 4}; + for (int f : freqs) { + heap.insert(new Node(f, f)); + } + int[] expected = {1, 3, 4, 5, 8}; + for (int e : expected) { + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(e, n.get_freq()); + } + } + + @Test + void testWith8Way() { + D_aryHeap heap = new D_aryHeap(20, 8); + int[] freqs = {10, 3, 7, 1, 15, 2, 9, 4, 12, 6}; + for (int f : freqs) { + heap.insert(new Node(f, f)); + } + int prev = -1; + while (!heap.is_empty()) { + Node n = heap.del_min(); + assertNotNull(n); + assertTrue(n.get_freq() >= prev); + prev = n.get_freq(); + } + } + + @Test + void testManyElements() { + Random rng = new Random(54321); + int n = 100; + D_aryHeap heap = new D_aryHeap(n, 4); + for (int i = 0; i < n; i++) { + heap.insert(new Node(rng.nextInt(10000), i)); + } + int prev = -1; + for (int i = 0; i < n; i++) { + Node node = heap.del_min(); + assertNotNull(node); + assertTrue(node.get_freq() >= prev, "Extraction order must be non-decreasing"); + prev = node.get_freq(); + } + assertTrue(heap.is_empty()); + } +} diff --git a/src/Decoder.java b/src/Decoder.java new file mode 100644 index 0000000..f7f5331 --- /dev/null +++ b/src/Decoder.java @@ -0,0 +1,124 @@ +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +public class Decoder { + + public static void add_node(Node huff, int msg, String code){ + if (code.length()==1){ + Node leaf = new Node(0,msg); + if (code.charAt(0)=='0') + huff.set_left(leaf); + else + huff.set_right(leaf); + return; + } + else { + if (code.charAt(0)=='0') { + if (huff.get_left()==null) + huff.set_left(new Node(0,-1)); + add_node(huff.get_left(),msg,code.substring(1)); + } + else { + if (huff.get_right()==null) + huff.set_right(new Node(0,-1)); + add_node(huff.get_right(), msg, code.substring(1)); + } + } + + } + + public static int build_huffTree_using_codeTable_txt(Node huff, String file) throws IOException { + int totalSymbols = 0; + System.out.print("Reading code_table.txt .. "); + File input = new File(file); + try (BufferedReader br = new BufferedReader(new FileReader(input))) { + String line; + boolean firstLine = true; + while ((line = br.readLine())!=null){ + if (firstLine) { + totalSymbols = Integer.parseInt(line.trim()); + firstLine = false; + continue; + } + String[] parts = line.split(" "); + int msg = Integer.parseInt(parts[0]); + String code = parts[1]; + add_node(huff,msg,code); + } + } + return totalSymbols; + } + + public static void main(String[] args) throws IOException { + + if (args.length < 2){ + System.out.println("Please check .. not all inputs provided"); + System.out.println("Inputs:"); + System.out.println(" -args[0]: encoded.bin"); + System.out.println(" -args[1]: code_table.txt"); + return; + } + + System.out.print("Building huffman tree from code_table.txt .. \n"); + Node huffman_tree = new Node(0,-1); + int totalSymbols = build_huffTree_using_codeTable_txt(huffman_tree, args[1]); + System.out.print("Done.\n"); + + System.out.print("Reading encoded.bin .. "); + byte[] code_bin = Files.readAllBytes(Paths.get(args[0])); + System.out.print("Done.\n"); + + System.out.print("Generating decoded.txt .. ");; + File decoded = new File("decoded.txt"); + BufferedWriter output = new BufferedWriter(new FileWriter(decoded)); + Node curr = huffman_tree; + int symbolsDecoded = 0; + + // Handle single-symbol case: tree root is internal with leaf as left child only + boolean singleSymbol = (huffman_tree.get_left() != null && huffman_tree.get_right() == null + && huffman_tree.get_left().get_msg() != -1); + + for (int i=0;i>j) & 0x1; + if (next_bit==0) { + output.write(huffman_tree.get_left().get_msg()+"\n"); + symbolsDecoded++; + } + j++; + } else { + if (curr.get_msg()!=-1){ + output.write(curr.get_msg()+"\n"); + symbolsDecoded++; + curr = huffman_tree; + } + else{ + int next_bit = (b>>j) & 0x1; + if (next_bit==1) + curr = curr.get_right(); + else + curr = curr.get_left(); + j++; + } + } + } + } + // Handle case where last symbol ends exactly at the last bit + if (!singleSymbol && curr.get_msg()!=-1 && symbolsDecoded < totalSymbols){ + output.write(curr.get_msg()+"\n"); + symbolsDecoded++; + } + output.close(); + System.out.println("Done.\n"); + } +} diff --git a/src/EdgeCaseTest.java b/src/EdgeCaseTest.java new file mode 100644 index 0000000..7efdefb --- /dev/null +++ b/src/EdgeCaseTest.java @@ -0,0 +1,113 @@ +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +class EdgeCaseTest { + + private final PrintStream originalOut = System.out; + private final PrintStream originalErr = System.err; + private ByteArrayOutputStream capturedOut; + + @BeforeEach + void setUp() { + capturedOut = new ByteArrayOutputStream(); + System.setOut(new PrintStream(capturedOut)); + } + + @AfterEach + void tearDown() { + System.setOut(originalOut); + System.setErr(originalErr); + new File("code_table.txt").delete(); + new File("encoded.bin").delete(); + new File("decoded.txt").delete(); + new File("test_edge_input.txt").delete(); + } + + private void writeInputFile(String filename, String content) throws IOException { + try (BufferedWriter bw = new BufferedWriter(new FileWriter(filename))) { + bw.write(content); + } + } + + private List readOutputFile(String filename) throws IOException { + List result = new ArrayList<>(); + try (BufferedReader br = new BufferedReader(new FileReader(filename))) { + String line; + while ((line = br.readLine()) != null) { + if (!line.isEmpty()) { + result.add(Integer.parseInt(line.trim())); + } + } + } + return result; + } + + @Test + void testEmptyInput() throws Exception { + writeInputFile("test_edge_input.txt", ""); + // Should not throw - should print a message about empty input + Encoder.main(new String[]{"test_edge_input.txt"}); + String output = capturedOut.toString(); + assertTrue(output.toLowerCase().contains("empty") || output.toLowerCase().contains("no"), + "Empty input should produce a message about empty/no data"); + // encoded.bin should not be created + assertFalse(new File("encoded.bin").exists() && new File("encoded.bin").length() > 0, + "Empty input should not produce meaningful encoded output"); + } + + @Test + void testSingleLineInput() throws Exception { + writeInputFile("test_edge_input.txt", "42\n"); + Encoder.main(new String[]{"test_edge_input.txt"}); + assertTrue(new File("code_table.txt").exists()); + assertTrue(new File("encoded.bin").exists()); + Decoder.main(new String[]{"encoded.bin", "code_table.txt"}); + assertTrue(new File("decoded.txt").exists()); + List decoded = readOutputFile("decoded.txt"); + assertEquals(1, decoded.size()); + assertEquals(42, decoded.get(0)); + } + + @Test + void testLargeNumbers() throws Exception { + String content = "999999\n888888\n999999\n777777\n888888\n"; + writeInputFile("test_edge_input.txt", content); + Encoder.main(new String[]{"test_edge_input.txt"}); + Decoder.main(new String[]{"encoded.bin", "code_table.txt"}); + List decoded = readOutputFile("decoded.txt"); + int[] expected = {999999, 888888, 999999, 777777, 888888}; + assertEquals(expected.length, decoded.size()); + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], decoded.get(i)); + } + } + + @Test + void testAllSameSymbol() throws Exception { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < 100; i++) { + sb.append("7\n"); + } + writeInputFile("test_edge_input.txt", sb.toString()); + Encoder.main(new String[]{"test_edge_input.txt"}); + Decoder.main(new String[]{"encoded.bin", "code_table.txt"}); + List decoded = readOutputFile("decoded.txt"); + assertEquals(100, decoded.size()); + for (int val : decoded) { + assertEquals(7, val); + } + } +} diff --git a/src/encoder.java b/src/Encoder.java similarity index 50% rename from src/encoder.java rename to src/Encoder.java index d6b28da..8c2516a 100644 --- a/src/encoder.java +++ b/src/Encoder.java @@ -10,17 +10,26 @@ import java.util.BitSet; import java.util.Collections; -public class encoder { +public class Encoder { - private static void fillNwrite_code_table(Node huffman_tree, String code, String[] code_table, BufferedWriter output) throws IOException { + private static void fillNwrite_code_table(Node huffman_tree, StringBuilder code, String[] code_table, BufferedWriter output) throws IOException { if (huffman_tree.get_msg() != -1){ - code_table[huffman_tree.get_msg()] = code; - output.write(huffman_tree.get_msg()+" "+code+"\n"); + if (code.length() == 0) code.append('0'); + code_table[huffman_tree.get_msg()] = code.toString(); + output.write(huffman_tree.get_msg()+" "+code.toString()+"\n"); return; } else{ - fillNwrite_code_table(huffman_tree.get_left(),code+"0",code_table,output); - fillNwrite_code_table(huffman_tree.get_right(),code+"1",code_table,output); + if (huffman_tree.get_left()!=null) { + code.append('0'); + fillNwrite_code_table(huffman_tree.get_left(),code,code_table,output); + code.setLength(code.length() - 1); + } + if (huffman_tree.get_right()!=null) { + code.append('1'); + fillNwrite_code_table(huffman_tree.get_right(),code,code_table,output); + code.setLength(code.length() - 1); + } } } @@ -30,16 +39,22 @@ private static byte[] get_binary_data(ArrayList freq_data, String[] cod for (int i=0;i freq_data = new ArrayList<>(); - while ((line = reader.readLine())!=null) { - if (!line.equals("")) { - freq_data.add(Integer.parseInt(line.toString())); + try (BufferedReader reader = new BufferedReader(new FileReader(input_file))) { + System.out.println("Reading input file ... "); + String line; + int lineNum = 0; + while ((line = reader.readLine())!=null) { + lineNum++; + if (!line.equals("")) { + try { + int val = Integer.parseInt(line.trim()); + if (val < 0) { + System.out.println("Error: Negative value " + val + " at line " + lineNum + ". Only non-negative integers are supported."); + return; + } + freq_data.add(val); + } catch (NumberFormatException e) { + System.out.println("Error: Invalid integer \"" + line + "\" at line " + lineNum + ". Each line must contain a non-negative integer."); + return; + } + } } } + if (freq_data.isEmpty()) { + System.out.println("Input file is empty or contains no valid data."); + return; + } + int[] freq_table = Gen_huffman_code.build_freq_table(freq_data); System.out.print("\nBuilding huffman tree... "); @@ -72,16 +105,11 @@ public static void main (String[] args) throws InterruptedException, IOException Arrays.fill(code_table, "-1"); File code_table_out = new File("code_table.txt"); BufferedWriter output = new BufferedWriter(new FileWriter(code_table_out)); - fillNwrite_code_table(huffman_tree,"",code_table,output); + output.write(freq_data.size()+"\n"); + fillNwrite_code_table(huffman_tree,new StringBuilder(),code_table,output); output.close(); System.out.print("Done.\n"); - /** - for(int i=0;i "+code_table[i]); - **/ - File encoded_out = new File("encoded.bin"); FileOutputStream fos = new FileOutputStream(encoded_out); System.out.print("Generating encoded.bin .. "); @@ -90,4 +118,4 @@ public static void main (String[] args) throws InterruptedException, IOException fos.close(); } -} \ No newline at end of file +} diff --git a/src/Gen_huffman_code.java b/src/Gen_huffman_code.java index 1c99280..81d6291 100644 --- a/src/Gen_huffman_code.java +++ b/src/Gen_huffman_code.java @@ -9,15 +9,17 @@ public class Gen_huffman_code { public static int count; - public static void gen_codes(Node n, String code){ - //System.out.println("Curr Node Freq: "+n.get_freq()); + public static void gen_codes(Node n, StringBuilder code){ if (n.get_msg() != -1){ - //System.out.println(n.get_msg()+"==>"+code); return; } else{ - gen_codes(n.get_left(),code+"0"); - gen_codes(n.get_right(),code+"1"); + code.append('0'); + gen_codes(n.get_left(), code); + code.setLength(code.length() - 1); + code.append('1'); + gen_codes(n.get_right(), code); + code.setLength(code.length() - 1); } } @@ -32,22 +34,25 @@ public static Node build_tree_using_4way_heap(int[] freq_table) { } } + /** Handle single-symbol case **/ + if (min_4Wayheap.getSize() == 4) { + Node leaf = min_4Wayheap.get_root(); + Node internal = new Node(leaf.get_freq(), -1); + internal.set_left(leaf); + leaf.set_parent(internal); + return internal; + } + /** Now we have a min heap of trees having single nodes only ** ** Now we will combine all to build a single huffman tree **/ - while (min_4Wayheap.heap_size != 4){ + while (min_4Wayheap.getSize() != 4){ Node internal = new Node(0,-1); internal.set_left(min_4Wayheap.del_min());internal.get_left().set_parent(internal); internal.set_right(min_4Wayheap.del_min());internal.get_right().set_parent(internal); internal.set_freq(internal.get_left().get_freq()+internal.get_right().get_freq()); - /** - System.out.println(internal.get_left().get_freq()+ - ":"+internal.get_left().get_msg()+ - "|"+internal.get_right().get_freq()+ - ":"+internal.get_right().get_msg()); - **/ min_4Wayheap.insert(internal); } - String code = ""; + StringBuilder code = new StringBuilder(); gen_codes(min_4Wayheap.get_root(),code); return min_4Wayheap.get_root(); } @@ -63,6 +68,15 @@ public static Node build_tree_using_pairing_heap(int[] freq_table) { } } + /** Handle single-symbol case **/ + if (min_pairingheap.get_root().get_left()==null) { + Node leaf = min_pairingheap.get_root().get_node(); + Node internal = new Node(leaf.get_freq(), -1); + internal.set_left(leaf); + leaf.set_parent(internal); + return internal; + } + /** Now we have a min pairing heap of trees having single nodes only ** ** Now we will combine all to build a single huffman tree **/ while(min_pairingheap.get_root().get_left()!=null){ @@ -70,15 +84,9 @@ public static Node build_tree_using_pairing_heap(int[] freq_table) { internal.set_left(min_pairingheap.del_min());internal.get_left().set_parent(internal); internal.set_right(min_pairingheap.del_min());internal.get_right().set_parent(internal); internal.set_freq(internal.get_left().get_freq()+internal.get_right().get_freq()); - /** - System.out.println(internal.get_left().get_freq()+ - ":"+internal.get_left().get_msg()+ - "|"+internal.get_right().get_freq()+ - ":"+internal.get_right().get_msg()); - **/ min_pairingheap.insert(internal); } - String code = ""; + StringBuilder code = new StringBuilder(); gen_codes(min_pairingheap.get_root().get_node(),code); return min_pairingheap.get_root().get_node(); } @@ -94,27 +102,34 @@ public static Node build_tree_using_binary_heap(int[] freq_table) { } } + /** Handle single-symbol case **/ + if (min_heap.getSize() == 1) { + Node leaf = min_heap.get_root(); + Node internal = new Node(leaf.get_freq(), -1); + internal.set_left(leaf); + leaf.set_parent(internal); + return internal; + } + /** Now we have a min heap of trees having single nodes only ** ** Now we will combine all to build a single huffman tree **/ - while (min_heap.heap_size != 1){ + while (min_heap.getSize() != 1){ Node internal = new Node(0,-1); internal.set_left(min_heap.del_min());internal.get_left().set_parent(internal); internal.set_right(min_heap.del_min());internal.get_right().set_parent(internal); internal.set_freq(internal.get_left().get_freq()+internal.get_right().get_freq()); - /** - System.out.println(internal.get_left().get_freq()+ - ":"+internal.get_left().get_msg()+ - "|"+internal.get_right().get_freq()+ - ":"+internal.get_right().get_msg()); - **/ min_heap.insert(internal); } - String code = ""; + StringBuilder code = new StringBuilder(); gen_codes(min_heap.get_root(),code); return min_heap.get_root(); } public static int[] build_freq_table(ArrayList freq_data){ + count = 0; + if (freq_data.isEmpty()) { + return new int[0]; + } int[] freq_table = new int[Collections.max(freq_data)+1]; Arrays.fill(freq_table, 0); System.out.print("Building Freq Table ... "); @@ -125,7 +140,6 @@ public static int[] build_freq_table(ArrayList freq_data){ return freq_table; } - @SuppressWarnings({ "resource", "unused" }) public static void main (String[] args) throws InterruptedException, IOException { /**Read the input file and build the frequency table**/ @@ -135,13 +149,33 @@ public static void main (String[] args) throws InterruptedException, IOException return; } input_file = new File(args[0]); - BufferedReader reader = new BufferedReader(new FileReader(input_file)); - System.out.print("Reading input file ... "); - String line; ArrayList freq_data = new ArrayList<>(); - while ((line = reader.readLine())!=null) - if (!line.equals("")) - freq_data.add(Integer.parseInt(line.toString())); + try (BufferedReader reader = new BufferedReader(new FileReader(input_file))) { + System.out.print("Reading input file ... "); + String line; + int lineNum = 0; + while ((line = reader.readLine())!=null) { + lineNum++; + if (!line.equals("")) { + try { + int val = Integer.parseInt(line.trim()); + if (val < 0) { + System.out.println("Error: Negative value " + val + " at line " + lineNum + ". Only non-negative integers are supported."); + return; + } + freq_data.add(val); + } catch (NumberFormatException e) { + System.out.println("Error: Invalid integer \"" + line + "\" at line " + lineNum + ". Each line must contain a non-negative integer."); + return; + } + } + } + } + + if (freq_data.isEmpty()) { + System.out.println("Input file is empty or contains no valid data."); + return; + } int[] freq_table = build_freq_table(freq_data); System.out.print("Done.\n"); diff --git a/src/HuffmanTreeTest.java b/src/HuffmanTreeTest.java new file mode 100644 index 0000000..fb28489 --- /dev/null +++ b/src/HuffmanTreeTest.java @@ -0,0 +1,164 @@ +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +class HuffmanTreeTest { + + /** + * Traverse the Huffman tree and collect leaf node codes into a map. + * msg -> binary code string + */ + private static void collectCodes(Node node, StringBuilder code, Map codeMap) { + if (node == null) return; + if (node.get_msg() != -1) { + // Leaf node + String c = code.length() == 0 ? "0" : code.toString(); + codeMap.put(node.get_msg(), c); + return; + } + code.append('0'); + collectCodes(node.get_left(), code, codeMap); + code.setLength(code.length() - 1); + code.append('1'); + collectCodes(node.get_right(), code, codeMap); + code.setLength(code.length() - 1); + } + + private static Map getCodes(Node root) { + Map codeMap = new HashMap<>(); + collectCodes(root, new StringBuilder(), codeMap); + return codeMap; + } + + /** + * Build a frequency table from an array of symbols, setting Gen_huffman_code.count. + */ + private static int[] buildFreqTable(int[] symbols) { + ArrayList freqData = new ArrayList<>(); + for (int s : symbols) { + freqData.add(s); + } + return Gen_huffman_code.build_freq_table(freqData); + } + + /** + * Check that no code is a prefix of another code (prefix-free property). + */ + private static void assertPrefixFree(Map codeMap) { + String[] codes = codeMap.values().toArray(new String[0]); + for (int i = 0; i < codes.length; i++) { + for (int j = 0; j < codes.length; j++) { + if (i != j) { + assertFalse(codes[j].startsWith(codes[i]), + "Code \"" + codes[i] + "\" is a prefix of \"" + codes[j] + "\""); + } + } + } + } + + @Test + void testBinaryHeapTreePrefixFree() { + int[] symbols = {0, 0, 0, 0, 1, 1, 2, 3, 3, 3}; + int[] freqTable = buildFreqTable(symbols); + Node root = Gen_huffman_code.build_tree_using_binary_heap(freqTable); + Map codes = getCodes(root); + assertEquals(4, codes.size()); + assertPrefixFree(codes); + } + + @Test + void testPairingHeapTreePrefixFree() { + int[] symbols = {0, 0, 0, 0, 1, 1, 2, 3, 3, 3}; + int[] freqTable = buildFreqTable(symbols); + Node root = Gen_huffman_code.build_tree_using_pairing_heap(freqTable); + Map codes = getCodes(root); + assertEquals(4, codes.size()); + assertPrefixFree(codes); + } + + @Test + void test4WayHeapTreePrefixFree() { + int[] symbols = {0, 0, 0, 0, 1, 1, 2, 3, 3, 3}; + int[] freqTable = buildFreqTable(symbols); + Node root = Gen_huffman_code.build_tree_using_4way_heap(freqTable); + Map codes = getCodes(root); + assertEquals(4, codes.size()); + assertPrefixFree(codes); + } + + @Test + void testMoreFrequentShorterCodes() { + // Symbol 0 appears 10 times, symbol 1 appears 5, symbol 2 appears 1 + int[] symbols = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2}; + int[] freqTable = buildFreqTable(symbols); + Node root = Gen_huffman_code.build_tree_using_binary_heap(freqTable); + Map codes = getCodes(root); + + // Symbol 0 (freq 10) should have code length <= symbol 2 (freq 1) + assertTrue(codes.get(0).length() <= codes.get(2).length(), + "More frequent symbol should have shorter or equal code"); + // Symbol 1 (freq 5) should have code length <= symbol 2 (freq 1) + assertTrue(codes.get(1).length() <= codes.get(2).length(), + "More frequent symbol should have shorter or equal code"); + } + + @Test + void testSingleSymbolTree() { + int[] symbols = {5, 5, 5}; + int[] freqTable = buildFreqTable(symbols); + Node root = Gen_huffman_code.build_tree_using_binary_heap(freqTable); + assertNotNull(root); + // Root should be internal (msg == -1) + assertEquals(-1, root.get_msg()); + // Left child should be the leaf + assertNotNull(root.get_left()); + assertEquals(5, root.get_left().get_msg()); + // Right child should be null for single-symbol tree + assertNull(root.get_right()); + // Code for symbol 5 should be "0" + Map codes = getCodes(root); + assertEquals(1, codes.size()); + assertEquals("0", codes.get(5)); + } + + @Test + void testTwoSymbolTree() { + int[] symbols = {1, 2, 1, 2}; + int[] freqTable = buildFreqTable(symbols); + Node root = Gen_huffman_code.build_tree_using_binary_heap(freqTable); + Map codes = getCodes(root); + assertEquals(2, codes.size()); + // Each should get a 1-bit code + for (String code : codes.values()) { + assertEquals(1, code.length(), "Two-symbol tree should produce 1-bit codes"); + } + } + + @Test + void testBuildFreqTable() { + ArrayList freqData = new ArrayList<>(); + freqData.add(0); + freqData.add(2245); + freqData.add(0); + freqData.add(999999); + int[] table = Gen_huffman_code.build_freq_table(freqData); + // table should be at least size 1000000 + assertTrue(table.length >= 1000000); + assertEquals(2, table[0]); + assertEquals(1, table[2245]); + assertEquals(1, table[999999]); + // count should be 3 unique symbols + assertEquals(3, Gen_huffman_code.count); + } + + @Test + void testBuildFreqTableEmpty() { + ArrayList freqData = new ArrayList<>(); + int[] table = Gen_huffman_code.build_freq_table(freqData); + assertEquals(0, table.length); + assertEquals(0, Gen_huffman_code.count); + } +} diff --git a/src/Makefile b/src/Makefile index 43a8c52..d3e0edb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,4 +1,6 @@ -all: Node.class PairNode.class BinaryHeap.class PairingHeap.class D_aryHeap.class Gen_huffman_code.class encoder.class decoder.class +JUNIT_JAR = lib/junit-platform-console-standalone.jar + +all: Node.class PairNode.class BinaryHeap.class PairingHeap.class D_aryHeap.class Gen_huffman_code.class Encoder.class Decoder.class Node.class: Node.java javac -d . -classpath . Node.java @@ -18,12 +20,33 @@ D_aryHeap.class: D_aryHeap.java Gen_huffman_code.class: Gen_huffman_code.java javac -d . -classpath . Gen_huffman_code.java -encoder.class: encoder.java - javac -d . -classpath . encoder.java +Encoder.class: Encoder.java + javac -d . -classpath . Encoder.java + +Decoder.class: Decoder.java + javac -d . -classpath . Decoder.java + +test: all BinaryHeapTest.class D_aryHeapTest.class PairingHeapTest.class HuffmanTreeTest.class RoundtripTest.class EdgeCaseTest.class + java -jar $(JUNIT_JAR) --class-path . --scan-classpath --disable-banner + +BinaryHeapTest.class: BinaryHeapTest.java + javac -d . -classpath .:$(JUNIT_JAR) BinaryHeapTest.java + +D_aryHeapTest.class: D_aryHeapTest.java + javac -d . -classpath .:$(JUNIT_JAR) D_aryHeapTest.java + +PairingHeapTest.class: PairingHeapTest.java + javac -d . -classpath .:$(JUNIT_JAR) PairingHeapTest.java + +HuffmanTreeTest.class: HuffmanTreeTest.java + javac -d . -classpath .:$(JUNIT_JAR) HuffmanTreeTest.java + +RoundtripTest.class: RoundtripTest.java + javac -d . -classpath .:$(JUNIT_JAR) RoundtripTest.java -decoder.class: decoder.java - javac -d . -classpath . decoder.java +EdgeCaseTest.class: EdgeCaseTest.java + javac -d . -classpath .:$(JUNIT_JAR) EdgeCaseTest.java clean: rm -rf ./*.class - rm encoded.bin decoded.txt code_table.txt + rm -f encoded.bin decoded.txt code_table.txt test_input.txt test_edge_input.txt diff --git a/src/PairingHeap.java b/src/PairingHeap.java index 358db5e..50ae956 100644 --- a/src/PairingHeap.java +++ b/src/PairingHeap.java @@ -1,9 +1,5 @@ class PairingHeap { - public void stub(){ - System.out.println("Reached stub func"); - } - private PairNode root; public PairingHeap(){ @@ -33,20 +29,17 @@ public Node del_min(){ if (is_empty()) return null; else{ - Node x = new Node(0,-1); - x = root.get_node(); + Node x = root.get_node(); if (root.get_left()==null) { root = null; return x; } root = root.get_left(); - PairNode next = new PairNode(new Node(0,-1)); - next = root.get_next(); + PairNode next = root.get_next(); root.set_next(null); root.set_prev(null); while(next!=null){ - PairNode temp = new PairNode(new Node(0,-1)); - temp = next; + PairNode temp = next; next = next.get_next(); temp.set_next(null); temp.set_prev(null); diff --git a/src/PairingHeapTest.java b/src/PairingHeapTest.java new file mode 100644 index 0000000..e917142 --- /dev/null +++ b/src/PairingHeapTest.java @@ -0,0 +1,105 @@ +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +class PairingHeapTest { + + @Test + void testInsertAndDelMin() { + PairingHeap heap = new PairingHeap(); + int[] freqs = {5, 3, 8, 1, 4}; + for (int f : freqs) { + heap.insert(new Node(f, f)); + } + int[] expected = {1, 3, 4, 5, 8}; + for (int e : expected) { + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(e, n.get_freq()); + } + } + + @Test + void testIsEmpty() { + PairingHeap heap = new PairingHeap(); + assertTrue(heap.is_empty()); + heap.insert(new Node(5, 0)); + assertFalse(heap.is_empty()); + heap.del_min(); + assertTrue(heap.is_empty()); + } + + @Test + void testSingleElement() { + PairingHeap heap = new PairingHeap(); + heap.insert(new Node(42, 0)); + assertFalse(heap.is_empty()); + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(42, n.get_freq()); + assertTrue(heap.is_empty()); + } + + @Test + void testEqualFrequencies() { + PairingHeap heap = new PairingHeap(); + heap.insert(new Node(7, 0)); + heap.insert(new Node(7, 1)); + heap.insert(new Node(7, 2)); + List msgs = new ArrayList<>(); + while (!heap.is_empty()) { + Node n = heap.del_min(); + assertNotNull(n); + assertEquals(7, n.get_freq()); + msgs.add(n.get_msg()); + } + assertEquals(3, msgs.size()); + Collections.sort(msgs); + assertEquals(List.of(0, 1, 2), msgs); + } + + @Test + void testGetRoot() { + PairingHeap heap = new PairingHeap(); + heap.insert(new Node(5, 0)); + heap.insert(new Node(3, 1)); + heap.insert(new Node(8, 2)); + PairNode root = heap.get_root(); + assertNotNull(root); + assertEquals(3, root.get_freq()); + } + + @Test + void testDelMinOnEmpty() { + PairingHeap heap = new PairingHeap(); + assertNull(heap.del_min()); + } + + @Test + void testGetRootOnEmpty() { + PairingHeap heap = new PairingHeap(); + assertNull(heap.get_root()); + } + + @Test + void testManyElements() { + Random rng = new Random(99999); + int n = 100; + PairingHeap heap = new PairingHeap(); + for (int i = 0; i < n; i++) { + heap.insert(new Node(rng.nextInt(10000), i)); + } + int prev = -1; + for (int i = 0; i < n; i++) { + Node node = heap.del_min(); + assertNotNull(node); + assertTrue(node.get_freq() >= prev, "Extraction order must be non-decreasing"); + prev = node.get_freq(); + } + assertTrue(heap.is_empty()); + } +} diff --git a/src/RoundtripTest.java b/src/RoundtripTest.java new file mode 100644 index 0000000..dc5c129 --- /dev/null +++ b/src/RoundtripTest.java @@ -0,0 +1,103 @@ +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +class RoundtripTest { + + private final PrintStream originalOut = System.out; + private final PrintStream originalErr = System.err; + + @BeforeEach + void setUp() { + // Suppress stdout during encoder/decoder runs + System.setOut(new PrintStream(new ByteArrayOutputStream())); + } + + @AfterEach + void tearDown() { + System.setOut(originalOut); + System.setErr(originalErr); + // Clean up generated files + new File("code_table.txt").delete(); + new File("encoded.bin").delete(); + new File("decoded.txt").delete(); + new File("test_input.txt").delete(); + } + + private void writeInputFile(String filename, int[] values) throws IOException { + try (BufferedWriter bw = new BufferedWriter(new FileWriter(filename))) { + for (int v : values) { + bw.write(v + "\n"); + } + } + } + + private List readOutputFile(String filename) throws IOException { + List result = new ArrayList<>(); + try (BufferedReader br = new BufferedReader(new FileReader(filename))) { + String line; + while ((line = br.readLine()) != null) { + if (!line.isEmpty()) { + result.add(Integer.parseInt(line.trim())); + } + } + } + return result; + } + + private void runRoundtrip(int[] input) throws Exception { + writeInputFile("test_input.txt", input); + Encoder.main(new String[]{"test_input.txt"}); + assertTrue(new File("code_table.txt").exists(), "code_table.txt should be created"); + assertTrue(new File("encoded.bin").exists(), "encoded.bin should be created"); + Decoder.main(new String[]{"encoded.bin", "code_table.txt"}); + assertTrue(new File("decoded.txt").exists(), "decoded.txt should be created"); + List decoded = readOutputFile("decoded.txt"); + assertEquals(input.length, decoded.size(), "Decoded output should have same number of symbols"); + for (int i = 0; i < input.length; i++) { + assertEquals(input[i], decoded.get(i), "Mismatch at position " + i); + } + } + + @Test + void testSmallInputRoundtrip() throws Exception { + int[] input = {0, 2245, 0, 999999, 2245, 0, 0, 2245, 2245, 34, 446, 34, 446, 34, 999999, 2}; + runRoundtrip(input); + } + + @Test + void testTwoSymbolRoundtrip() throws Exception { + int[] input = {1, 2, 1, 2}; + runRoundtrip(input); + } + + @Test + void testSingleSymbolRoundtrip() throws Exception { + int[] input = {5, 5, 5}; + runRoundtrip(input); + } + + @Test + void testManySymbolsRoundtrip() throws Exception { + // Generate input with ~20 different symbols + int[] symbols = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 50, 100, 200, 500, 1000, 5000, 10000, 50000, 99999}; + int[] input = new int[60]; + for (int i = 0; i < input.length; i++) { + input[i] = symbols[i % symbols.length]; + } + runRoundtrip(input); + } +} diff --git a/src/decoder.java b/src/decoder.java deleted file mode 100644 index c254554..0000000 --- a/src/decoder.java +++ /dev/null @@ -1,96 +0,0 @@ -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; - -public class decoder { - - public static void add_node(Node huff, int msg, String code){ - if (code.length()==1){ - Node leaf = new Node(0,msg); - if (code.charAt(0)=='0') - huff.set_left(leaf); - else - huff.set_right(leaf); - return; - } - else { - if (code.charAt(0)=='0') { - if (huff.get_left()==null) - huff.set_left(new Node(0,-1)); - add_node(huff.get_left(),msg,code.substring(1)); - } - else { - if (huff.get_right()==null) - huff.set_right(new Node(0,-1)); - add_node(huff.get_right(), msg, code.substring(1)); - } - } - - } - - public static Node build_huffTree_using_codeTable_txt(String file) throws IOException { - Node huff = new Node(0,-1); - System.out.print("Reading code_table.txt .. "); - File input = new File(file); - BufferedReader br = new BufferedReader(new FileReader(input)); - String line; - while ((line = br.readLine())!=null){ - int msg = Integer.parseInt(line.split(" ")[0]); - String code = line.split(" ")[1]; - add_node(huff,msg,code); - } - br.close(); - return huff; - } - - public static void main(String[] args) throws IOException { - - if (args.length < 2){ - System.out.println("Please check .. not all inputs provided"); - System.out.println("Inputs:"); - System.out.println(" -args[0]: encoded.bin"); - System.out.println(" -args[1]: code_table.txt"); - return; - } - - System.out.print("Building huffman tree from code_table.txt .. \n"); - Node huffman_tree = build_huffTree_using_codeTable_txt(args[1]); - System.out.print("Done.\n"); - - System.out.print("Reading encoded.bin .. "); - byte[] code_bin = Files.readAllBytes(Paths.get(args[0])); - System.out.print("Done.\n"); - - System.out.print("Generating decoded.txt .. ");; - File decoded = new File("decoded.txt"); - BufferedWriter output = new BufferedWriter(new FileWriter(decoded)); - Node curr = huffman_tree; - for (int i=0;i>j) & 0x1; - if (next_bit==1) - curr = curr.get_right(); - else - curr = curr.get_left(); - j++; - } - } - if (i==code_bin.length-1 && j==8) - output.write(curr.get_msg()+"\n"); - } - output.close(); - System.out.println("Done.\n"); - } -}