font-model/test_unit.py at main · Create-Inc/font-model · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#!/usr/bin/env python3
"""
Unit tests for font-model components.

Tests pure functions and training code paths without cloud infrastructure.
Run with: python -m pytest test_unit.py -v

These tests use a tiny synthetic dataset (3 classes, ~10 images each)
and run 1 training step per mode to validate correctness.
"""

import json
import os
import shutil
import tempfile

import numpy as np
import pytest
import torch


# ---------------------------------------------------------------------------
# Test: extract_family label parser
# ---------------------------------------------------------------------------

class TestExtractFamily:
    def setup_method(self):
        from confusion_matrix import extract_family
        self.extract = extract_family

    def test_hyphen_separator(self):
        assert self.extract("Barlow-Bold") == "Barlow"

    def test_underscore_separator(self):
        assert self.extract("Aleo_Bold") == "Aleo"

    def test_no_separator(self):
        assert self.extract("Ultra") == "Ultra"

    def test_multiple_hyphens(self):
        assert self.extract("BigShouldersText-Bold") == "BigShouldersText"

    def test_italic_suffix(self):
        assert self.extract("CrimsonPro-BoldItalic") == "CrimsonPro"

    def test_numeric_weight(self):
        assert self.extract("Roboto-300") == "Roboto"


# ---------------------------------------------------------------------------
# Test: label filtering (._* macOS resource fork files)
# ---------------------------------------------------------------------------

class TestLabelFiltering:
    def test_filters_dot_underscore(self):
        entries = ["Arial", "._Arial", "Roboto", "._Roboto", ".DS_Store"]
        filtered = sorted(d for d in entries if not d.startswith('.'))
        assert filtered == ["Arial", "Roboto"]

    def test_no_hidden_files(self):
        entries = ["Arial", "Roboto", "Inter"]
        filtered = sorted(d for d in entries if not d.startswith('.'))
        assert filtered == ["Arial", "Inter", "Roboto"]


# ---------------------------------------------------------------------------
# Test: typographic distance matrix
# ---------------------------------------------------------------------------

class TestTypographicDistance:
    def setup_method(self):
        from confusion_matrix import compute_typographic_distance_matrix
        self.compute = compute_typographic_distance_matrix

    def test_same_variant_zero_distance(self):
        labels = ["Roboto-Bold", "Roboto-Light", "Inter-Regular"]
        dist = self.compute(labels)
        for i in range(len(labels)):
            assert dist[i, i] == 0.0

    def test_symmetric(self):
        labels = ["Roboto-Bold", "Inter-Regular", "CrimsonPro-Light"]
        dist = self.compute(labels)
        for i in range(len(labels)):
            for j in range(len(labels)):
                assert dist[i, j] == dist[j, i]

    def test_same_family_closer_than_cross_family(self):
        labels = ["Roboto-Bold", "Roboto-Light", "Inter-Regular"]
        dist = self.compute(labels)
        # Roboto-Bold to Roboto-Light (same family) should be < Roboto-Bold to Inter
        assert dist[0, 1] < dist[0, 2]

    def test_same_category_closer_than_cross_category(self):
        # Inter (sans) vs Roboto (sans) should be closer than Inter (sans) vs CrimsonPro (serif)
        labels = ["Inter-Regular", "Roboto-Regular", "CrimsonPro-Regular"]
        dist = self.compute(labels)
        assert dist[0, 1] < dist[0, 2]

    def test_distance_tiers(self):
        labels = ["Roboto-Regular", "Roboto-Bold", "Inter-Regular", "CrimsonPro-Regular"]
        dist = self.compute(labels)
        same_family = dist[0, 1]       # Roboto-Regular to Roboto-Bold
        same_category = dist[0, 2]     # Roboto to Inter (both sans)
        cross_category = dist[0, 3]    # Roboto (sans) to CrimsonPro (serif)
        assert 0.2 <= same_family <= 0.4
        assert same_category == 0.7
        assert cross_category == 1.0

    def test_triangle_inequality(self):
        labels = ["Roboto-Regular", "Roboto-Bold", "Inter-Regular", "CrimsonPro-Light"]
        dist = self.compute(labels)
        n = len(labels)
        for i in range(n):
            for j in range(n):
                for k in range(n):
                    assert dist[i, j] <= dist[i, k] + dist[k, j] + 1e-10


# ---------------------------------------------------------------------------
# Test: SWER computation
# ---------------------------------------------------------------------------

class TestSWER:
    def test_perfect_classifier(self):
        from confusion_matrix import compute_severity_metrics, compute_typographic_distance_matrix
        labels = ["Roboto-Bold", "Inter-Regular", "CrimsonPro-Light"]
        dist = compute_typographic_distance_matrix(labels)
        y_true = ["Roboto-Bold", "Inter-Regular", "CrimsonPro-Light"]
        y_pred = ["Roboto-Bold", "Inter-Regular", "CrimsonPro-Light"]
        metrics = compute_severity_metrics(y_true, y_pred, labels, dist)
        assert metrics["severity_weighted_error"] == 0.0
        assert metrics["family_accuracy"] == 1.0

    def test_within_family_error_low_severity(self):
        from confusion_matrix import compute_severity_metrics, compute_typographic_distance_matrix
        labels = ["Roboto-Regular", "Roboto-Bold", "CrimsonPro-Regular"]
        dist = compute_typographic_distance_matrix(labels)
        # Confuse within family
        y_true = ["Roboto-Regular"]
        y_pred = ["Roboto-Bold"]
        metrics = compute_severity_metrics(y_true, y_pred, labels, dist)
        within_severity = metrics["severity_weighted_error"]
        # Confuse across category
        y_pred_cross = ["CrimsonPro-Regular"]
        metrics_cross = compute_severity_metrics(y_true, y_pred_cross, labels, dist)
        cross_severity = metrics_cross["severity_weighted_error"]
        assert within_severity < cross_severity

    def test_family_accuracy_counts_same_family_as_correct(self):
        from confusion_matrix import compute_severity_metrics, compute_typographic_distance_matrix
        labels = ["Roboto-Regular", "Roboto-Bold", "Inter-Regular"]
        dist = compute_typographic_distance_matrix(labels)
        y_true = ["Roboto-Regular", "Inter-Regular"]
        y_pred = ["Roboto-Bold", "Inter-Regular"]  # wrong variant, right family
        metrics = compute_severity_metrics(y_true, y_pred, labels, dist)
        assert metrics["family_accuracy"] == 1.0  # both correct at family level


# ---------------------------------------------------------------------------
# Test: ResNet forward returns dict
# ---------------------------------------------------------------------------

class TestResNetForward:
    def test_returns_dict_with_loss_and_logits(self):
        from torchvision.models import resnet50

        backbone = resnet50(weights=None)
        num_classes = 3
        backbone.fc = torch.nn.Linear(backbone.fc.in_features, num_classes)

        # Minimal forward
        x = torch.randn(2, 3, 224, 224)
        logits = backbone(x)
        labels = torch.tensor([0, 1])
        loss = torch.nn.functional.cross_entropy(logits, labels)
        result = {"loss": loss, "logits": logits}

        assert isinstance(result, dict)
        assert "loss" in result
        assert "logits" in result
        assert result["logits"].shape == (2, num_classes)
        assert result["loss"].dim() == 0  # scalar


# ---------------------------------------------------------------------------
# Test: preprocessing consistency
# ---------------------------------------------------------------------------

class TestPreprocessing:
    def test_train_and_inference_use_same_transform(self):
        from handler import get_inference_transform
        from train_model import get_transform
        from transformers import AutoImageProcessor
        from PIL import Image

        processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base-imagenet1k-1-layer")
        size = processor.size["shortest_edge"]

        # Create a test image
        img = Image.new("RGB", (300, 100), color=(128, 64, 200))

        # Inference transform
        inf_transform = get_inference_transform(processor, size)
        inf_result = inf_transform(img)

        # Train transform
        train_transform = get_transform(processor, size)
        train_result = train_transform({"image": img})["pixel_values"]

        assert torch.allclose(inf_result, train_result, atol=1e-6)


# ---------------------------------------------------------------------------
# Test: training smoke tests (1 step per mode on tiny dataset)
# ---------------------------------------------------------------------------

@pytest.fixture(scope="module")
def tiny_dataset():
    """Create a tiny dataset for smoke testing."""
    tmpdir = tempfile.mkdtemp()
    for split in ["train", "test"]:
        for cls in ["FakeSerif_Regular", "FakeSans_Bold", "FakeMono_Light"]:
            cls_dir = os.path.join(tmpdir, split, cls)
            os.makedirs(cls_dir, exist_ok=True)
            n_images = 10 if split == "train" else 3
            for i in range(n_images):
                from PIL import Image
                img = Image.new("RGB", (224, 224), color=(
                    (i * 37) % 256, (i * 73) % 256, (i * 113) % 256
                ))
                img.save(os.path.join(cls_dir, f"img_{i:03d}.png"))
    yield tmpdir
    shutil.rmtree(tmpdir)


def _run_training(tiny_dataset, extra_args):
    """Helper to run train_model.py with given args and verify output."""
    with tempfile.TemporaryDirectory() as outdir:
        import subprocess
        cmd = [
            "python3", "train_model.py",
            "--data_dir", tiny_dataset,
            "--output_dir", outdir,
            "--batch_size", "4",
            "--epochs", "1",
            "--learning_rate", "1e-4",
        ] + extra_args
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
        assert result.returncode == 0, f"Training failed:\nSTDOUT: {result.stdout[-500:]}\nSTDERR: {result.stderr[-500:]}"
        assert os.path.exists(os.path.join(outdir, "result_model")), "No result_model directory"
        return outdir


class TestTrainingSmoke:
    def test_lora_default(self, tiny_dataset):
        _run_training(tiny_dataset, [])

    def test_lora_rank4(self, tiny_dataset):
        _run_training(tiny_dataset, ["--lora_rank", "4", "--lora_alpha", "8"])

    def test_lora_rank16(self, tiny_dataset):
        _run_training(tiny_dataset, ["--lora_rank", "16", "--lora_alpha", "32"])

    def test_linear_probe(self, tiny_dataset):
        _run_training(tiny_dataset, ["--linear_probe"])

    def test_full_finetune(self, tiny_dataset):
        _run_training(tiny_dataset, ["--full_finetune"])

    def test_resnet_baseline(self, tiny_dataset):
        _run_training(tiny_dataset, ["--resnet_baseline"])