diff --git a/tests/unit/test_qwen3_5_adapter.py b/tests/unit/test_qwen3_5_adapter.py index 8fd885174..d1a4a7b6a 100644 --- a/tests/unit/test_qwen3_5_adapter.py +++ b/tests/unit/test_qwen3_5_adapter.py @@ -18,11 +18,23 @@ ) from transformer_lens.tools.model_registry import HF_SUPPORTED_ARCHITECTURES +try: + from transformers import Qwen3_5ForCausalLM as _Qwen3_5ForCausalLM + from transformers import Qwen3_5TextConfig + + _QWEN3_5_AVAILABLE = True +except ImportError: + _QWEN3_5_AVAILABLE = False + # ============================================================================ # Test: Registration # ============================================================================ +@pytest.mark.skipif( + not _QWEN3_5_AVAILABLE, + reason="Qwen3_5TextConfig / Qwen3_5ForCausalLM not available in installed transformers", +) class TestQwen3_5Registration: """Verify the adapter is properly registered in all lookup tables.""" @@ -79,6 +91,10 @@ def _make_bridge_cfg(**overrides): # ============================================================================ +@pytest.mark.skipif( + not _QWEN3_5_AVAILABLE, + reason="Qwen3_5TextConfig / Qwen3_5ForCausalLM not available in installed transformers", +) class TestQwen3_5ComponentMapping: """Verify the component_mapping structure for Qwen3_5. @@ -134,18 +150,28 @@ def test_unembed_path(self, adapter): # ---- Block submodules ---- def test_block_submodules_keys(self, adapter): - """blocks submodules must contain ln1, ln2, mlp but NOT attn. + """blocks submodules must contain ln1, ln2, mlp, and optional attn + linear_attn.""" + submodules = adapter.component_mapping["blocks"].submodules + assert set(submodules.keys()) == {"ln1", "ln2", "mlp", "attn", "linear_attn"} - Critical correctness test: self_attn is absent on linear-attention - layers, so mapping attn as a block submodule would crash on those layers. - """ + def test_attn_is_optional(self, adapter): + """attn must be marked optional (absent on linear-attention layers).""" + submodules = adapter.component_mapping["blocks"].submodules + assert submodules["attn"].optional is True + + def test_linear_attn_is_optional(self, adapter): + """linear_attn must be marked optional (absent on full-attention layers).""" submodules = adapter.component_mapping["blocks"].submodules - assert set(submodules.keys()) == {"ln1", "ln2", "mlp"} + assert submodules["linear_attn"].optional is True + + def test_linear_attn_bridge_type(self, adapter): + """linear_attn must be a GatedDeltaNetBridge.""" + from transformer_lens.model_bridge.generalized_components.gated_delta_net import ( + GatedDeltaNetBridge, + ) - def test_no_attn_in_block_submodules(self, adapter): - """attn must NOT appear as a block submodule (hybrid architecture safety check).""" submodules = adapter.component_mapping["blocks"].submodules - assert "attn" not in submodules + assert isinstance(submodules["linear_attn"], GatedDeltaNetBridge) def test_ln1_path(self, adapter): """ln1 maps to input_layernorm.""" @@ -257,6 +283,10 @@ def test_weight_processing_conversions_empty(self, adapter): # ============================================================================ +@pytest.mark.skipif( + not _QWEN3_5_AVAILABLE, + reason="Qwen3_5TextConfig / Qwen3_5ForCausalLM not available in installed transformers", +) class TestQwen3_5ConfigAttributes: """Verify all cfg attributes are set correctly by the adapter.""" @@ -341,6 +371,10 @@ def test_n_key_value_heads_not_set_when_absent(self): # ============================================================================ +@pytest.mark.skipif( + not _QWEN3_5_AVAILABLE, + reason="Qwen3_5TextConfig / Qwen3_5ForCausalLM not available in installed transformers", +) class TestQwen3_5PreprocessWeights: """Verify preprocess_weights correctly slices q_proj.weight per-head. @@ -478,14 +512,6 @@ def test_weight_processing_conversions_is_empty_dict(self, adapter): # Test: Integration (Phase A+B) # ============================================================================ -try: - from transformers import Qwen3_5ForCausalLM as _Qwen3_5ForCausalLM - from transformers import Qwen3_5TextConfig - - _QWEN3_5_AVAILABLE = True -except ImportError: - _QWEN3_5_AVAILABLE = False - def _make_tiny_hf_model(): """Create a tiny Qwen3_5ForCausalLM for integration testing. diff --git a/tests/unit/test_qwen3_next_adapter.py b/tests/unit/test_qwen3_next_adapter.py index 1a2842e7b..516d7a8b5 100644 --- a/tests/unit/test_qwen3_next_adapter.py +++ b/tests/unit/test_qwen3_next_adapter.py @@ -135,19 +135,28 @@ def test_unembed_path(self, adapter): # ---- Block submodules ---- def test_block_submodules_keys(self, adapter): - """blocks submodules must contain ln1, ln2, mlp but NOT attn. + """blocks submodules must contain ln1, ln2, mlp, and optional attn + linear_attn.""" + submodules = adapter.component_mapping["blocks"].submodules + assert set(submodules.keys()) == {"ln1", "ln2", "mlp", "attn", "linear_attn"} - This is a critical correctness test: self_attn is absent on - linear-attention layers, so mapping attn as a block submodule - would crash on those layers. - """ + def test_attn_is_optional(self, adapter): + """attn must be marked optional (absent on linear-attention layers).""" + submodules = adapter.component_mapping["blocks"].submodules + assert submodules["attn"].optional is True + + def test_linear_attn_is_optional(self, adapter): + """linear_attn must be marked optional (absent on full-attention layers).""" submodules = adapter.component_mapping["blocks"].submodules - assert set(submodules.keys()) == {"ln1", "ln2", "mlp"} + assert submodules["linear_attn"].optional is True + + def test_linear_attn_bridge_type(self, adapter): + """linear_attn must be a GatedDeltaNetBridge.""" + from transformer_lens.model_bridge.generalized_components.gated_delta_net import ( + GatedDeltaNetBridge, + ) - def test_no_attn_in_block_submodules(self, adapter): - """attn must NOT appear as a block submodule (hybrid architecture safety check).""" submodules = adapter.component_mapping["blocks"].submodules - assert "attn" not in submodules + assert isinstance(submodules["linear_attn"], GatedDeltaNetBridge) def test_ln1_path(self, adapter): """ln1 maps to input_layernorm.""" diff --git a/transformer_lens/benchmarks/component_outputs.py b/transformer_lens/benchmarks/component_outputs.py index ba2d03edd..504825ce1 100644 --- a/transformer_lens/benchmarks/component_outputs.py +++ b/transformer_lens/benchmarks/component_outputs.py @@ -311,8 +311,12 @@ def benchmark_all_components( n_layers = self.cfg.n_layers for layer_idx in range(n_layers): - # Recursively test each subcomponent and its nested subcomponents + # Get the actual block to check which submodules were bound + actual_block = getattr(self.bridge_model, block_type)[layer_idx] for subcomp_name, subcomponent in blocks_component.submodules.items(): + # Skip optional submodules absent on this layer (hybrid architectures) + if subcomp_name not in actual_block._modules: + continue comp_path = f"{block_type}.{layer_idx}.{subcomp_name}" self._test_component_recursive( comp_path, subcomponent, test_inputs, results, skip_components diff --git a/transformer_lens/benchmarks/weight_processing.py b/transformer_lens/benchmarks/weight_processing.py index 5a7fafd65..62e561b25 100644 --- a/transformer_lens/benchmarks/weight_processing.py +++ b/transformer_lens/benchmarks/weight_processing.py @@ -638,10 +638,24 @@ def benchmark_mlp_output_centering( message="Skipped for tiny/test model (random weights don't center meaningfully)", ) - # Check if this is an MoE model - MoE models don't have a single W_out weight + # Find an MLP-like submodule (may be "mlp", "shared_mlp", etc.) from transformer_lens.model_bridge.generalized_components.moe import MoEBridge - if isinstance(bridge.blocks[0].mlp, MoEBridge): + mlp_module = None + block = bridge.blocks[0] + for name in ("mlp", "shared_mlp"): + if name in block._modules: + mlp_module = block._modules[name] + break + if mlp_module is None: + return BenchmarkResult( + name="mlp_output_centering", + severity=BenchmarkSeverity.WARNING, + message="No MLP submodule found on block 0", + passed=False, + ) + + if isinstance(mlp_module, MoEBridge): return BenchmarkResult( name="mlp_output_centering", severity=BenchmarkSeverity.INFO, @@ -651,11 +665,10 @@ def benchmark_mlp_output_centering( # Check if W_out exists and is accessible (HT format or bridge format) w_out = None - if hasattr(bridge.blocks[0].mlp, "W_out"): - w_out = bridge.blocks[0].mlp.W_out - elif hasattr(bridge.blocks[0].mlp, "out"): - # Bridge format: mlp.out is a LinearBridge wrapping nn.Linear - out_module = bridge.blocks[0].mlp.out + if hasattr(mlp_module, "W_out"): + w_out = mlp_module.W_out + elif hasattr(mlp_module, "out"): + out_module = mlp_module.out if hasattr(out_module, "original_component") and hasattr( out_module.original_component, "weight" ): diff --git a/transformer_lens/model_bridge/component_setup.py b/transformer_lens/model_bridge/component_setup.py index a2986d585..7821d0354 100644 --- a/transformer_lens/model_bridge/component_setup.py +++ b/transformer_lens/model_bridge/component_setup.py @@ -100,9 +100,10 @@ def setup_submodules( else: remote_path = submodule.name is_optional = getattr(submodule, "optional", False) - # Fast path: first segment absent → skip without entering get_remote_component + # Fast path: first segment absent or None → skip first_segment = remote_path.split(".")[0] - if is_optional and not hasattr(original_model, first_segment): + first_value = getattr(original_model, first_segment, None) + if is_optional and first_value is None: logger.debug( "Optional '%s' (path '%s') absent on %s", module_name, diff --git a/transformer_lens/model_bridge/generalized_components/__init__.py b/transformer_lens/model_bridge/generalized_components/__init__.py index fb789cc30..c2c7a121b 100644 --- a/transformer_lens/model_bridge/generalized_components/__init__.py +++ b/transformer_lens/model_bridge/generalized_components/__init__.py @@ -35,6 +35,9 @@ from transformer_lens.model_bridge.generalized_components.alibi_joint_qkv_attention import ( ALiBiJointQKVAttentionBridge, ) +from transformer_lens.model_bridge.generalized_components.gated_delta_net import ( + GatedDeltaNetBridge, +) from transformer_lens.model_bridge.generalized_components.gated_mlp import ( GatedMLPBridge, ) diff --git a/transformer_lens/model_bridge/generalized_components/attention.py b/transformer_lens/model_bridge/generalized_components/attention.py index 05d5e0982..5608ca2d8 100644 --- a/transformer_lens/model_bridge/generalized_components/attention.py +++ b/transformer_lens/model_bridge/generalized_components/attention.py @@ -59,6 +59,7 @@ def __init__( requires_position_embeddings: bool = False, requires_attention_mask: bool = False, attention_mask_4d: bool = False, + optional: bool = False, ): """Initialize the attention bridge. @@ -82,7 +83,11 @@ def __init__( if conversion_rule is None: conversion_rule = AttentionAutoConversion(config) super().__init__( - name, config=config, submodules=submodules or {}, conversion_rule=conversion_rule + name, + config=config, + submodules=submodules or {}, + conversion_rule=conversion_rule, + optional=optional, ) self.hook_attn_scores = HookPoint() self.hook_pattern = HookPoint() diff --git a/transformer_lens/model_bridge/generalized_components/gated_delta_net.py b/transformer_lens/model_bridge/generalized_components/gated_delta_net.py new file mode 100644 index 000000000..1e13fe4bf --- /dev/null +++ b/transformer_lens/model_bridge/generalized_components/gated_delta_net.py @@ -0,0 +1,303 @@ +"""GatedDeltaNet bridge for Qwen3.5/Qwen3Next linear-attention layers. + +Reimplements forward (prefill only) to expose mech-interp-relevant intermediate +states. Falls back to HF native forward during autoregressive generation where +cache state management is required. +""" +from typing import TYPE_CHECKING, Any, Dict, Optional + +import torch +import torch.nn.functional as F + +from transformer_lens.hook_points import HookPoint +from transformer_lens.model_bridge.generalized_components.base import ( + GeneralizedComponent, +) + +if TYPE_CHECKING: + from transformer_lens.ActivationCache import ActivationCache + + +class GatedDeltaNetBridge(GeneralizedComponent): + """Bridge for GatedDeltaNet linear-attention with full hook decomposition. + + Hooks (prefill, in execution order): + hook_in: input hidden_states [batch, seq, d_model] + hook_q_pre_conv: Q after projection, before conv [batch, seq, n_k_heads, head_k_dim] + hook_k_pre_conv: K after projection, before conv [batch, seq, n_k_heads, head_k_dim] + hook_v_pre_conv: V after projection, before conv [batch, seq, n_v_heads, head_v_dim] + hook_q: Q after conv, pre-GQA-expansion [batch, seq, n_k_heads, head_k_dim] + Note: on standard attn layers, hook_q is post-projection. Here it's + post-conv — use hook_q_pre_conv for the projection-only output. + hook_k: K after conv [batch, seq, n_k_heads, head_k_dim] + hook_v: V after conv [batch, seq, n_v_heads, head_v_dim] + hook_beta_logit: pre-sigmoid write gate logit, per v-head [batch, seq, n_v_heads] + hook_beta: write strength sigmoid(b), per v-head [batch, seq, n_v_heads] + hook_log_decay: log-space decay g (NEGATIVE; multiplicative decay = exp(g)), + per v-head [batch, seq, n_v_heads] + hook_recurrence_out: output of linear recurrence [batch, seq, n_v_heads, head_v_dim] + hook_gate_input: z tensor (pre-silu) for GatedRMSNorm [batch, seq, n_v_heads, head_v_dim] + hook_out: final output to residual stream [batch, seq, d_model] + + During generation (cache_params present), only hook_in/hook_out fire. + + Property aliases: + W_in_proj_qkvz, W_in_proj_ba, W_out_proj, A_log, dt_bias + """ + + hook_aliases = { + "hook_linear_attn_in": "hook_in", + "hook_linear_attn_out": "hook_out", + } + + property_aliases = { + "W_in_proj_qkvz": "in_proj_qkvz.weight", + "W_in_proj_ba": "in_proj_ba.weight", + "W_out_proj": "out_proj.weight", + "A_log": "A_log", + "dt_bias": "dt_bias", + } + + def __init__( + self, + name: str, + config: Optional[Any] = None, + submodules: Optional[Dict[str, GeneralizedComponent]] = None, + **kwargs, + ): + super().__init__(name, config=config, submodules=submodules or {}, **kwargs) + # Pre-conv (after projection split, before causal conv mixes positions) + self.hook_q_pre_conv = HookPoint() + self.hook_k_pre_conv = HookPoint() + self.hook_v_pre_conv = HookPoint() + # Post-conv (pre-GQA-expansion, pre-recurrence) + self.hook_q = HookPoint() + self.hook_k = HookPoint() + self.hook_v = HookPoint() + # Gate parameters (per v-head) + self.hook_beta_logit = HookPoint() + self.hook_beta = HookPoint() + self.hook_log_decay = HookPoint() + # Recurrence output + gated norm input + self.hook_recurrence_out = HookPoint() + self.hook_gate_input = HookPoint() + + def forward(self, *args: Any, **kwargs: Any) -> Any: + if self.original_component is None: + raise RuntimeError(f"Original component not set for {self.name}.") + + if kwargs.get("cache_params") is not None: + return self._native_forward(*args, **kwargs) + return self._hooked_forward(*args, **kwargs) + + def _native_forward(self, *args: Any, **kwargs: Any) -> Any: + """Delegate to HF with hook_in/hook_out only (generation path).""" + assert self.original_component is not None + if "hidden_states" in kwargs: + kwargs["hidden_states"] = self.hook_in(kwargs["hidden_states"]) + elif len(args) > 0 and isinstance(args[0], torch.Tensor): + args = (self.hook_in(args[0]),) + args[1:] + + output = self.original_component(*args, **kwargs) + + if isinstance(output, tuple) and len(output) > 0: + first = output[0] + if isinstance(first, torch.Tensor): + return (self.hook_out(first),) + output[1:] + return output + if isinstance(output, torch.Tensor): + return self.hook_out(output) + return output + + def _hooked_forward(self, *args: Any, **kwargs: Any) -> Any: + """Reimplemented forward exposing all intermediate states (prefill).""" + hf: Any = self.original_component + + if "hidden_states" in kwargs: + hidden_states = kwargs["hidden_states"] + elif len(args) > 0 and isinstance(args[0], torch.Tensor): + hidden_states = args[0] + else: + raise ValueError("Could not find hidden_states") + + attention_mask = kwargs.get("attention_mask") + if attention_mask is not None: + # Inline masking — avoids hard dependency on qwen3_next module + hidden_states = hidden_states * attention_mask.unsqueeze(-1) + + hidden_states = self.hook_in(hidden_states) + batch_size, seq_len, _ = hidden_states.shape + + # --- Projections (two layouts: fused vs split) --- + if hasattr(hf, "in_proj_qkvz"): + # Qwen3Next: fused Q+K+V+Z projection, fused beta+alpha + projected_qkvz = hf.in_proj_qkvz(hidden_states) + projected_ba = hf.in_proj_ba(hidden_states) + query, key, value, z, b, a = hf.fix_query_key_value_ordering( + projected_qkvz, projected_ba + ) + else: + # Qwen3.5: separate projections (in_proj_qkv, in_proj_z, in_proj_b, in_proj_a) + mixed_qkv_flat = hf.in_proj_qkv(hidden_states) + z = hf.in_proj_z(hidden_states).reshape(batch_size, seq_len, -1, hf.head_v_dim) + b = hf.in_proj_b(hidden_states) + a = hf.in_proj_a(hidden_states) + # Split QKV and reshape to per-head for pre-conv hooks + q_flat, k_flat, v_flat = torch.split( + mixed_qkv_flat, [hf.key_dim, hf.key_dim, hf.value_dim], dim=-1 + ) + query = q_flat.reshape(batch_size, seq_len, -1, hf.head_k_dim) + key = k_flat.reshape(batch_size, seq_len, -1, hf.head_k_dim) + value = v_flat.reshape(batch_size, seq_len, -1, hf.head_v_dim) + + # --- Pre-conv hooks (per-head shape, before conv mixes positions) --- + query = self.hook_q_pre_conv(query) + key = self.hook_k_pre_conv(key) + value = self.hook_v_pre_conv(value) + + # Flatten for conv + query, key, value = (x.reshape(x.shape[0], x.shape[1], -1) for x in (query, key, value)) + + # --- Causal Convolution --- + mixed_qkv = torch.cat((query, key, value), dim=-1).transpose(1, 2) + if hf.causal_conv1d_fn is not None: + mixed_qkv = hf.causal_conv1d_fn( + x=mixed_qkv, + weight=hf.conv1d.weight.squeeze(1), + bias=hf.conv1d.bias, + activation=hf.activation, + seq_idx=None, + ) + else: + mixed_qkv = F.silu(hf.conv1d(mixed_qkv)[:, :, :seq_len]) + mixed_qkv = mixed_qkv.transpose(1, 2) + + # Split post-conv into per-head Q, K, V + query, key, value = torch.split( + mixed_qkv, + [hf.key_dim, hf.key_dim, hf.value_dim], + dim=-1, + ) + query = query.reshape(batch_size, seq_len, -1, hf.head_k_dim) + key = key.reshape(batch_size, seq_len, -1, hf.head_k_dim) + value = value.reshape(batch_size, seq_len, -1, hf.head_v_dim) + + # --- Post-conv hooks (pre-GQA-expansion, pre-recurrence) --- + query = self.hook_q(query) + key = self.hook_k(key) + value = self.hook_v(value) + + # --- Gate parameters (per v-head) --- + b = self.hook_beta_logit(b) + beta = self.hook_beta(b.sigmoid()) + + # g is log-space decay (NEGATIVE); multiplicative decay = exp(g) + g = -hf.A_log.float().exp() * F.softplus(a.float() + hf.dt_bias) + g = self.hook_log_decay(g) + + # GQA expansion (Q/K from n_k_heads → n_v_heads) + if hf.num_v_heads // hf.num_k_heads > 1: + repeat = hf.num_v_heads // hf.num_k_heads + query = query.repeat_interleave(repeat, dim=2) + key = key.repeat_interleave(repeat, dim=2) + + # --- Core linear recurrence (opaque fused kernel) --- + core_out, _ = hf.chunk_gated_delta_rule( + query, + key, + value, + g=g, + beta=beta, + initial_state=None, + output_final_state=False, + use_qk_l2norm_in_kernel=True, + ) + core_out = self.hook_recurrence_out(core_out) + + # --- Gated RMSNorm: norm(core_out) * silu(z) --- + z = self.hook_gate_input(z) + z_shape = z.shape + core_out = hf.norm( + core_out.reshape(-1, core_out.shape[-1]), + z.reshape(-1, z.shape[-1]), + ) + core_out = core_out.reshape(z_shape).reshape(batch_size, seq_len, -1) + + # --- Output projection --- + output = hf.out_proj(core_out) + return self.hook_out(output) + + def compute_effective_attention( + self, + cache: "ActivationCache", + layer_idx: int, + ) -> torch.Tensor: + """Materialize the effective attention matrix from cached hook values. + + The gated delta rule recurrence is:: + + S_t = exp(g_t) * S_{t-1} + beta_t * v_t @ k_t^T + o_t = S_t^T @ q_t + + The effective attention M[i,j] = contribution of input j to output i:: + + M[i,j] = (q_i^T @ k_j) * beta_j * prod_{t=j+1}^{i} exp(g_t) + + **Approximation note:** The fused kernel applies L2-normalization to Q + and K internally (``use_qk_l2norm_in_kernel=True``). The hooked Q/K are + pre-normalization, so this reconstruction diverges when Q/K norms vary + significantly across positions/heads. Accuracy is best when Q/K norms + are roughly uniform (common after training converges). + + Args: + cache: ActivationCache from ``run_with_cache``. + layer_idx: Block index for this linear_attn layer. + + Returns: + ``[batch, n_v_heads, seq, seq]`` causal matrix (upper triangle zero). + + Cost is O(batch * n_heads * seq^2); use on short sequences. + """ + prefix = f"blocks.{layer_idx}.linear_attn" + q_key = f"{prefix}.hook_q" + k_key = f"{prefix}.hook_k" + beta_key = f"{prefix}.hook_beta" + decay_key = f"{prefix}.hook_log_decay" + + for key in [q_key, k_key, beta_key, decay_key]: + if key not in cache: + raise RuntimeError( + f"compute_effective_attention needs {key!r} in cache. " + "Run run_with_cache() on the bridge first." + ) + + # [batch, seq, n_k_heads, head_k_dim] — pre-GQA-expansion + q = cache[q_key].float() + k = cache[k_key].float() + beta = cache[beta_key].float() # [batch, seq, n_v_heads] + g = cache[decay_key].float() # [batch, seq, n_v_heads] + + # GQA expansion to match n_v_heads + if q.shape[2] < beta.shape[-1]: + repeat = beta.shape[-1] // q.shape[2] + q = q.repeat_interleave(repeat, dim=2) + k = k.repeat_interleave(repeat, dim=2) + + batch, seq, n_heads, d_head = q.shape + + # QK similarity: [batch, n_heads, seq_i, seq_j] + q_perm = q.permute(0, 2, 1, 3) + k_perm = k.permute(0, 2, 1, 3) + qk = torch.matmul(q_perm, k_perm.transpose(-2, -1)) + + # Cumulative decay: L[i,j] = exp(sum g[j+1..i]) + g_perm = g.permute(0, 2, 1) # [batch, n_heads, seq] + cumsum_g = torch.cumsum(g_perm, dim=-1) + L_log = cumsum_g[:, :, :, None] - cumsum_g[:, :, None, :] + + causal_mask = torch.tril(torch.ones(seq, seq, dtype=torch.bool, device=q.device)) + L = torch.where(causal_mask[None, None], torch.exp(L_log), torch.zeros_like(L_log)) + + # M[i,j] = qk[i,j] * beta[j] * L[i,j] + beta_col = beta.permute(0, 2, 1)[:, :, None, :] + return qk * beta_col * L diff --git a/transformer_lens/model_bridge/generalized_components/position_embeddings_attention.py b/transformer_lens/model_bridge/generalized_components/position_embeddings_attention.py index 33f6dd21f..135ab0d17 100644 --- a/transformer_lens/model_bridge/generalized_components/position_embeddings_attention.py +++ b/transformer_lens/model_bridge/generalized_components/position_embeddings_attention.py @@ -15,6 +15,7 @@ import torch import transformers.models.gemma2.modeling_gemma2 as gemma2_module +from transformer_lens.hook_points import HookPoint from transformer_lens.model_bridge.generalized_components.attention import ( AttentionBridge, ) @@ -112,21 +113,29 @@ class PositionEmbeddingsAttentionBridge(PositionEmbeddingHooksMixin, AttentionBr """ def __init__( - self, name: str, config: Any, submodules: Optional[Dict[str, Any]] = None, **kwargs + self, + name: str, + config: Any, + submodules: Optional[Dict[str, Any]] = None, + optional: bool = False, + # Accepted for caller compatibility (Granite passes these explicitly) + # but always forced to True — this bridge reimplements attention. + requires_attention_mask: bool = True, + requires_position_embeddings: bool = True, + **kwargs, # absorb any other AttentionBridge kwargs callers may pass ): - """Initialize Gemma-3 attention bridge. - - Args: - name: Component name - config: Model configuration - submodules: Dictionary of subcomponents - **kwargs: Additional arguments passed to AttentionBridge - """ - kwargs["requires_position_embeddings"] = True - kwargs["requires_attention_mask"] = True - kwargs["maintain_native_attention"] = True - super().__init__(name, config, submodules, **kwargs) + super().__init__( + name, + config, + submodules, + requires_position_embeddings=True, + requires_attention_mask=True, + maintain_native_attention=True, + optional=optional, + ) self._init_position_embedding_hooks() + if getattr(config, "gated_q_proj", False): + self.hook_q_gate = HookPoint() def set_original_component(self, component: torch.nn.Module) -> None: """Set the original HF component and register for rotary hook firing. @@ -201,19 +210,34 @@ def forward(self, *args: Any, **kwargs: Any) -> Any: key_states = hf_attn.k_proj(hidden_states) value_states = hf_attn.v_proj(hidden_states) + # Gated q_proj (Qwen3.5/Qwen3Next): q_proj outputs [Q|gate] interleaved + # per head. cfg.gated_q_proj is set by the adapter. The actual split only + # triggers if the output is 2x the standard width (n_heads * head_dim). + # In processed mode, preprocess_weights slices q_proj to standard width + # so this naturally passes through. + q_gate = None + if getattr(self.config, "gated_q_proj", False): + q_dim = query_states.shape[-1] + n_heads = getattr(self.config, "n_heads", q_dim // head_dim) + standard_q_dim = n_heads * head_dim + if q_dim == standard_q_dim * 2: + query_states, q_gate = torch.chunk( + query_states.view(*input_shape, -1, head_dim * 2), 2, dim=-1 + ) + q_gate = q_gate.reshape(*input_shape, -1) + query_states = query_states.reshape(*input_shape, -1) + has_q_norm = hasattr(hf_attn, "q_norm") and hf_attn.q_norm is not None has_k_norm = hasattr(hf_attn, "k_norm") and hf_attn.k_norm is not None applied_pre_reshape_norm = False if has_q_norm: try: - # Try pre-reshape norm (OLMo 2 style: norm on flat [batch, seq, hidden]) query_states = hf_attn.q_norm(query_states) if has_k_norm: key_states = hf_attn.k_norm(key_states) applied_pre_reshape_norm = True except RuntimeError: - # Shape mismatch — this model uses post-reshape norms pass query_states = query_states.view(hidden_shape).transpose(1, 2) @@ -306,6 +330,12 @@ def forward(self, *args: Any, **kwargs: Any) -> Any: attn_output = attn_output.transpose(1, 2).contiguous() attn_output = attn_output.reshape(*input_shape, -1) + # --- Gated attention (Qwen3.5/Qwen3Next) --- + if q_gate is not None: + if hasattr(self, "hook_q_gate"): + q_gate = self.hook_q_gate(q_gate) + attn_output = attn_output * torch.sigmoid(q_gate) + # --- Output Projection --- # Different architectures name this differently: o_proj (Llama, Gemma, Qwen), # dense (Phi), out_proj (others) diff --git a/transformer_lens/model_bridge/supported_architectures/granite.py b/transformer_lens/model_bridge/supported_architectures/granite.py index f85ef850d..c46081b0b 100644 --- a/transformer_lens/model_bridge/supported_architectures/granite.py +++ b/transformer_lens/model_bridge/supported_architectures/granite.py @@ -51,6 +51,7 @@ def _setup_common_config(self, cfg: Any) -> None: self.cfg.gated_mlp = True self.cfg.attn_only = False self.cfg.uses_rms_norm = True + self.cfg.default_prepend_bos = False self.cfg.eps_attr = "variance_epsilon" self.default_config = { @@ -65,11 +66,12 @@ def _setup_common_config(self, cfg: Any) -> None: self.default_config["n_key_value_heads"] = cfg.n_key_value_heads self.cfg.n_key_value_heads = cfg.n_key_value_heads - def _build_attention_bridge(self) -> PositionEmbeddingsAttentionBridge: + def _build_attention_bridge(self, optional: bool = False) -> PositionEmbeddingsAttentionBridge: """Build the standard Granite attention bridge.""" return PositionEmbeddingsAttentionBridge( name="self_attn", config=self.cfg, + optional=optional, submodules={ "q": LinearBridge(name="q_proj"), "k": LinearBridge(name="k_proj"), @@ -124,11 +126,11 @@ def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> No if bridge_model is not None and hasattr(bridge_model, "blocks"): for block in bridge_model.blocks: - if hasattr(block, "attn"): + if "attn" in block._modules: block.attn.set_rotary_emb(rotary_emb) try: attn_bridge = self.get_generalized_component("blocks.0.attn") attn_bridge.set_rotary_emb(rotary_emb) - except (AttributeError, KeyError): + except (AttributeError, KeyError, ValueError): pass diff --git a/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py b/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py index 2c776365b..53229252e 100644 --- a/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py +++ b/transformer_lens/model_bridge/supported_architectures/granite_moe_hybrid.py @@ -1,13 +1,11 @@ """Granite MoE Hybrid architecture adapter. -GraniteMoeHybridForCausalLM is a hybrid Mamba + Attention architecture with -Sparse Mixture of Experts. Layers alternate between Mamba SSM blocks and -standard attention blocks, with a shared MLP and optional sparse MoE on -every layer. - -Since self_attn is None on Mamba layers and mamba is None on attention -layers, we only map submodules that exist on ALL layers (norms, shared_mlp, -block_sparse_moe). The HF native forward handles mamba/attention dispatch. +Hybrid Mamba2 + Attention with Sparse MoE. Most layers are Mamba SSM blocks; +a few are standard attention (determined by config.layer_types). Every layer +has a shared MLP and optional sparse MoE. + +Both attention and Mamba are mapped as optional — each present only on its +respective layer type. Mamba hooks expose in_proj, conv1d, and inner_norm. """ from typing import Any @@ -21,53 +19,55 @@ MoEBridge, RMSNormalizationBridge, RotaryEmbeddingBridge, + SSM2MixerBridge, UnembeddingBridge, ) +from transformer_lens.model_bridge.generalized_components.depthwise_conv1d import ( + DepthwiseConv1DBridge, +) from transformer_lens.model_bridge.supported_architectures.granite import ( GraniteArchitectureAdapter, ) class GraniteMoeHybridArchitectureAdapter(GraniteArchitectureAdapter): - """Architecture adapter for IBM Granite MoE Hybrid models. - - Hybrid Mamba2 + Attention architecture with Sparse MoE. Most layers are Mamba - SSM blocks; a few are standard attention (determined by config.layer_types). + """Hybrid Mamba2 + Attention with Sparse MoE. - Since self_attn is None on Mamba layers and mamba is None on attention layers, - we only map submodules present on ALL layers (norms, shared_mlp, MoE). The HF - native forward handles mamba/attention dispatch internally. - - Hook coverage: - - Block-level: hook_resid_pre, hook_resid_post on every layer - - Normalization: ln1 (input_layernorm), ln2 (post_attention_layernorm) - - MLP: shared_mlp input/output hooks - - MoE: block_sparse_moe input/output and router_scores hooks - - Attention/Mamba internals are NOT individually hooked (conditional per layer) + Attention is optional (absent on Mamba layers). shared_mlp and MoE are + universal. Inherits Granite config and attention bridge construction. """ def __init__(self, cfg: Any) -> None: - """Initialize the Granite MoE Hybrid architecture adapter.""" - # Call ArchitectureAdapter.__init__ directly, not GraniteArchitectureAdapter.__init__, - # because we need to customize the setup sequence ArchitectureAdapter.__init__(self, cfg) - self._setup_common_config(cfg) - # Hybrid may use "rope" or "nope" (no positional embeddings) pos_emb_type = getattr(cfg, "position_embedding_type", "rope") if pos_emb_type != "rope": self.cfg.positional_embedding_type = "none" - # No attention weight conversions — attn Q/K/V aren't mapped as submodules + self.supports_fold_ln = False self.weight_processing_conversions = {} self.component_mapping = self._build_component_mapping() + def _build_mamba_bridge(self) -> SSM2MixerBridge: + """Mamba-2 mixer bridge with in_proj, conv1d, inner_norm hooks.""" + return SSM2MixerBridge( + name="mamba", + config=self.cfg, + optional=True, + submodules={ + "in_proj": LinearBridge(name="in_proj"), + "conv1d": DepthwiseConv1DBridge(name="conv1d"), + "inner_norm": LinearBridge(name="norm"), + }, + ) + def _build_component_mapping(self) -> dict: - """Build component mapping with only universal (all-layer) submodules.""" - block_submodules = { + block_submodules: dict = { "ln1": RMSNormalizationBridge(name="input_layernorm", config=self.cfg), "ln2": RMSNormalizationBridge(name="post_attention_layernorm", config=self.cfg), + "attn": self._build_attention_bridge(optional=True), + "mamba": self._build_mamba_bridge(), "shared_mlp": MLPBridge( name="shared_mlp", config=self.cfg, @@ -87,12 +87,9 @@ def _build_component_mapping(self) -> dict: config=self.cfg, ) - mapping = { + mapping: dict = { "embed": EmbeddingBridge(name="model.embed_tokens"), - "blocks": BlockBridge( - name="model.layers", - submodules=block_submodules, - ), + "blocks": BlockBridge(name="model.layers", submodules=block_submodules), "ln_final": RMSNormalizationBridge(name="model.norm", config=self.cfg), "unembed": UnembeddingBridge(name="lm_head", config=self.cfg), } @@ -101,10 +98,3 @@ def _build_component_mapping(self) -> dict: mapping["rotary_emb"] = RotaryEmbeddingBridge(name="model.rotary_emb", config=self.cfg) return mapping - - def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> None: - """No-op for hybrid models. - - Hybrid models don't map attention as a submodule (it's conditional per - layer), so there are no rotary embedding references to set up. - """ diff --git a/transformer_lens/model_bridge/supported_architectures/qwen3.py b/transformer_lens/model_bridge/supported_architectures/qwen3.py index 8dcc1d6d3..4676d1175 100644 --- a/transformer_lens/model_bridge/supported_architectures/qwen3.py +++ b/transformer_lens/model_bridge/supported_architectures/qwen3.py @@ -1,7 +1,14 @@ -"""Qwen3 architecture adapter.""" +"""Qwen3 architecture adapter. + +Base adapter for the Qwen3 model family. Provides shared config setup, +attention bridge construction, and setup_component_testing used by +Qwen3, Qwen3.5, and Qwen3Next variants. +""" from typing import Any +import torch + from transformer_lens.model_bridge.architecture_adapter import ArchitectureAdapter from transformer_lens.model_bridge.generalized_components import ( BlockBridge, @@ -12,33 +19,33 @@ RotaryEmbeddingBridge, UnembeddingBridge, ) +from transformer_lens.model_bridge.generalized_components.gated_delta_net import ( + GatedDeltaNetBridge, +) from transformer_lens.model_bridge.generalized_components.position_embeddings_attention import ( PositionEmbeddingsAttentionBridge, ) class Qwen3ArchitectureAdapter(ArchitectureAdapter): - """Architecture adapter for Qwen3 models. - - Qwen3 is architecturally similar to Gemma3: - - Uses RMSNorm for all normalizations - - Has Q/K normalization within attention (RMSNorm on head dimension) - - Uses rotary position embeddings (RoPE) - - Requires position_embeddings and attention_mask in forward pass - - Uses gated MLP (gate_proj + up_proj -> down_proj) - - No biases on any linear layers - - Key differences from Qwen2: - - Qwen3 has q_norm and k_norm layers in attention (Qwen2 doesn't) - - Qwen3 requires position_embeddings parameter (like Gemma3) - - Uses PositionEmbeddingsAttentionBridge instead of AttentionBridge + """Architecture adapter for Qwen3 dense models. + + RMSNorm, RoPE, GQA, Q/K head norms, gated MLP. No biases. + Serves as base class for Qwen3.5 and Qwen3Next hybrid variants. """ - def __init__(self, cfg: Any) -> None: - """Initialize the Qwen3 architecture adapter.""" + def __init__(self, cfg: Any, *, hybrid: bool = False) -> None: super().__init__(cfg) - - # Set config variables for weight processing + self._setup_qwen3_config(cfg) + if hybrid: + self.supports_fold_ln = False + self.weight_processing_conversions: dict = {} + else: + self.weight_processing_conversions = {**self._qkvo_weight_conversions()} + self.component_mapping = self._build_component_mapping(hybrid=hybrid) + + def _setup_qwen3_config(self, cfg: Any) -> None: + """Config shared across all Qwen3 variants (dense, hybrid, MoE).""" self.cfg.normalization_type = "RMS" self.cfg.positional_embedding_type = "rotary" self.cfg.final_rms = True @@ -46,85 +53,105 @@ def __init__(self, cfg: Any) -> None: self.cfg.attn_only = False self.cfg.uses_rms_norm = True self.cfg.default_prepend_bos = False - - # Use eager attention to support output_attentions for hook_attn_scores and hook_pattern - # SDPA doesn't support output_attentions, which is required for HookedTransformer compatibility self.cfg.attn_implementation = "eager" - self.weight_processing_conversions = { - **self._qkvo_weight_conversions(), + if hasattr(cfg, "n_key_value_heads") and cfg.n_key_value_heads is not None: + self.cfg.n_key_value_heads = cfg.n_key_value_heads + + def _build_attention_bridge(self, optional: bool = False) -> PositionEmbeddingsAttentionBridge: + """Standard Qwen3 attention bridge with Q/K norms.""" + return PositionEmbeddingsAttentionBridge( + name="self_attn", + config=self.cfg, + optional=optional, + submodules={ + "q": LinearBridge(name="q_proj"), + "k": LinearBridge(name="k_proj"), + "v": LinearBridge(name="v_proj"), + "o": LinearBridge(name="o_proj"), + "q_norm": RMSNormalizationBridge(name="q_norm", config=self.cfg), + "k_norm": RMSNormalizationBridge(name="k_norm", config=self.cfg), + }, + ) + + def _build_mlp_bridge(self): + """Dense gated MLP (gate_proj + up_proj -> down_proj). Override for MoE.""" + return GatedMLPBridge( + name="mlp", + config=self.cfg, + submodules={ + "gate": LinearBridge(name="gate_proj"), + "in": LinearBridge(name="up_proj"), + "out": LinearBridge(name="down_proj"), + }, + ) + + def _build_linear_attn_bridge(self, optional: bool = False) -> GatedDeltaNetBridge: + """GatedDeltaNet linear-attention bridge for hybrid variants.""" + return GatedDeltaNetBridge( + name="linear_attn", + config=self.cfg, + optional=optional, + ) + + def _build_component_mapping(self, *, hybrid: bool = False) -> dict: + """Parametric component mapping. hybrid=True adds optional linear_attn.""" + block_submodules: dict = { + "ln1": RMSNormalizationBridge(name="input_layernorm", config=self.cfg), + "ln2": RMSNormalizationBridge(name="post_attention_layernorm", config=self.cfg), + "attn": self._build_attention_bridge(optional=hybrid), + "mlp": self._build_mlp_bridge(), } - - # Set up component mapping - self.component_mapping = { + if hybrid: + block_submodules["linear_attn"] = self._build_linear_attn_bridge(optional=True) + return { "embed": EmbeddingBridge(name="model.embed_tokens"), "rotary_emb": RotaryEmbeddingBridge(name="model.rotary_emb", config=self.cfg), - "blocks": BlockBridge( - name="model.layers", - submodules={ - "ln1": RMSNormalizationBridge(name="input_layernorm", config=self.cfg), - "ln2": RMSNormalizationBridge(name="post_attention_layernorm", config=self.cfg), - "attn": PositionEmbeddingsAttentionBridge( - name="self_attn", - config=self.cfg, - submodules={ - "q": LinearBridge(name="q_proj"), - "k": LinearBridge(name="k_proj"), - "v": LinearBridge(name="v_proj"), - "o": LinearBridge(name="o_proj"), - "q_norm": RMSNormalizationBridge(name="q_norm", config=self.cfg), - "k_norm": RMSNormalizationBridge(name="k_norm", config=self.cfg), - }, - ), - "mlp": GatedMLPBridge( - name="mlp", - config=self.cfg, - submodules={ - "gate": LinearBridge(name="gate_proj"), - "in": LinearBridge(name="up_proj"), - "out": LinearBridge(name="down_proj"), - }, - ), - }, - ), + "blocks": BlockBridge(name="model.layers", submodules=block_submodules), "ln_final": RMSNormalizationBridge(name="model.norm", config=self.cfg), "unembed": UnembeddingBridge(name="lm_head"), } def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> None: - """Set up rotary embedding references for Qwen3 component testing. - - Qwen3 uses RoPE (Rotary Position Embeddings). We set the rotary_emb on - all attention bridge instances for component testing. - - We also force the HF model to use "eager" attention to match the bridge's - implementation. The bridge uses "eager" to support output_attentions for hooks. - - Args: - hf_model: The HuggingFace Qwen3 model instance - bridge_model: The TransformerBridge model (if available, set rotary_emb on actual instances) - """ - # Get rotary embedding instance from the model + """Set eager attn on HF model and rotary_emb on attention bridges.""" rotary_emb = hf_model.model.rotary_emb - # Force HF model to use "eager" attention to match bridge implementation - # Bridge uses "eager" to support output_attentions for hook compatibility if hasattr(hf_model, "config") and hasattr(hf_model.config, "_attn_implementation"): hf_model.config._attn_implementation = "eager" - # Also set on all attention layers if hasattr(hf_model, "model") and hasattr(hf_model.model, "layers"): for layer in hf_model.model.layers: if hasattr(layer, "self_attn") and hasattr(layer.self_attn, "config"): layer.self_attn.config._attn_implementation = "eager" - # Set rotary_emb on actual bridge instances in bridge_model if available if bridge_model is not None and hasattr(bridge_model, "blocks"): - # Set on each layer's actual attention bridge instance for block in bridge_model.blocks: - if hasattr(block, "attn"): + if "attn" in block._modules: block.attn.set_rotary_emb(rotary_emb) - # Also set on the template for get_generalized_component() calls - attn_bridge = self.get_generalized_component("blocks.0.attn") - attn_bridge.set_rotary_emb(rotary_emb) + # Set on template for get_generalized_component() calls + # Set on template — may not exist in hybrid adapters + mapping = self.component_mapping or {} + blocks_template = mapping.get("blocks") if isinstance(mapping, dict) else None + if blocks_template and "attn" in getattr(blocks_template, "submodules", {}): + try: + attn_template = self.get_generalized_component("blocks.0.attn") + attn_template.set_rotary_emb(rotary_emb) + except (ValueError, AttributeError, KeyError): + pass + + @staticmethod + def _preprocess_gated_q_proj( + state_dict: dict[str, torch.Tensor], n_heads: int, d_head: int + ) -> dict[str, torch.Tensor]: + """Slice query half from gated q_proj.weight (interleaved per-head layout). + + q_proj.weight has shape (n_heads * d_head * 2, hidden_size) with + interleaved [query, gate] rows per head. Extracts query-only half. + """ + keys_to_update = [k for k in state_dict if k.endswith(".self_attn.q_proj.weight")] + for key in keys_to_update: + w = state_dict[key] + w = w.view(n_heads, d_head * 2, -1) + state_dict[key] = w[:, :d_head, :].reshape(n_heads * d_head, -1) + return state_dict diff --git a/transformer_lens/model_bridge/supported_architectures/qwen3_5.py b/transformer_lens/model_bridge/supported_architectures/qwen3_5.py index b1e71e9f3..1ef0913bf 100644 --- a/transformer_lens/model_bridge/supported_architectures/qwen3_5.py +++ b/transformer_lens/model_bridge/supported_architectures/qwen3_5.py @@ -1,175 +1,57 @@ -"""Qwen3_5 architecture adapter. +"""Qwen3.5 architecture adapter. -Qwen3_5ForCausalLM is a hybrid linear-attention + full-attention architecture -with a dense gated MLP on every layer. Layers follow a repeating pattern of -3 GatedDeltaNet (linear attention) layers followed by 1 standard full-attention -layer (every 4th layer by default). - -Since self_attn is absent on linear-attention layers, we only map submodules -that exist on ALL layers (norms, MLP). The HF native forward handles -linear/full attention dispatch internally, and GatedMLPBridge maps the dense -gate_proj/up_proj/down_proj structure on every layer. - -Hook coverage: -- Block-level: hook_resid_pre, hook_resid_post on every layer -- Normalization: ln1 (input_layernorm), ln2 (post_attention_layernorm) -- MLP: hook_in, hook_out via GatedMLPBridge (gate_proj, up_proj, down_proj) -- Attention internals are NOT individually hooked (self_attn absent on - linear-attention layers; mapping it would crash on those layers) - -Optional parameters: -- n_key_value_heads: only set when using GQA (num_key_value_heads != num_attention_heads) +Hybrid linear-attention (GatedDeltaNet) + full-attention with dense gated MLP. +3 linear-attn layers per 1 full-attn layer. Extends Qwen3 base with +optional attention mapping and fold_ln disabled. """ from typing import Any import torch -from transformer_lens.model_bridge.architecture_adapter import ArchitectureAdapter -from transformer_lens.model_bridge.generalized_components import ( - BlockBridge, - EmbeddingBridge, - GatedMLPBridge, - LinearBridge, - RMSNormalizationBridge, - RotaryEmbeddingBridge, - UnembeddingBridge, +from transformer_lens.model_bridge.supported_architectures.qwen3 import ( + Qwen3ArchitectureAdapter, ) -class Qwen3_5ArchitectureAdapter(ArchitectureAdapter): - """Architecture adapter for Qwen3_5 models. - - Qwen3_5ForCausalLM is a hybrid linear-attention + full-attention - architecture with dense gated MLPs, sharing the same hybrid design as - Qwen3Next but replacing the sparse MoE MLP with a standard dense MLP: - - Uses RMSNorm for all normalizations - - Uses rotary position embeddings (RoPE) with partial rotation - - Every 4th layer is a full-attention layer (self_attn); the rest are - GatedDeltaNet linear-attention layers (linear_attn) - - Uses dense gated MLP (gate_proj + up_proj -> down_proj) on ALL layers - - No biases on any linear layers - - Full-attention layers have Q/K normalization (q_norm, k_norm) - - Full-attention q_proj outputs n_heads * head_dim * 2 (interleaved - query+gate layout); the preprocess_weights method slices the query half - - Since self_attn is absent on linear-attention layers, only universally - present submodules (norms, MLP) are mapped as block submodules. The HF - native forward handles per-layer attention dispatch internally. +class Qwen3_5ArchitectureAdapter(Qwen3ArchitectureAdapter): + """Hybrid linear-attention + full-attention with dense gated MLP. - Optional parameters: - - n_key_value_heads: set when num_key_value_heads != num_attention_heads (GQA) + Inherits Qwen3 config/attention/MLP structure. Differences: + - Attention + linear_attn are optional (per-layer type) + - Gated q_proj (2x wide) sliced by preprocess_weights for weight analysis """ - def __init__(self, cfg: Any) -> None: - """Initialize the Qwen3_5 architecture adapter.""" - super().__init__(cfg) - - # Core config attributes - self.cfg.normalization_type = "RMS" - self.cfg.positional_embedding_type = "rotary" - self.cfg.final_rms = True - self.cfg.gated_mlp = True - self.cfg.attn_only = False - self.cfg.uses_rms_norm = True - self.cfg.default_prepend_bos = False - - # Disable fold_ln: ln1 is followed by self_attn on full-attention - # layers and by linear_attn (GatedDeltaNet) on linear-attention layers, - # but neither is mapped as a bridge submodule (see class docstring for - # why). With no bridge-mapped target to fold into, the standard fold_ln - # pass leaves LN weights in an inconsistent state and the processed - # bridge output diverges from the unprocessed / HF output. Skipping - # fold_ln keeps processed-mode forward passes numerically equivalent. - self.supports_fold_ln = False + _MIN_TRANSFORMERS_VERSION = "5.2.0" - # Use eager attention to support output_attentions for hook_attn_scores - # and hook_pattern. SDPA doesn't support output_attentions. - self.cfg.attn_implementation = "eager" - - # GQA: only set n_key_value_heads when using grouped-query attention - if hasattr(cfg, "n_key_value_heads") and cfg.n_key_value_heads is not None: - self.cfg.n_key_value_heads = cfg.n_key_value_heads + def __init__(self, cfg: Any) -> None: + import transformers - self.weight_processing_conversions: dict = {} - self.component_mapping: dict = { - "embed": EmbeddingBridge(name="model.embed_tokens"), - "rotary_emb": RotaryEmbeddingBridge(name="model.rotary_emb", config=self.cfg), - "blocks": BlockBridge( - name="model.layers", - submodules={ - "ln1": RMSNormalizationBridge(name="input_layernorm", config=self.cfg), - "ln2": RMSNormalizationBridge(name="post_attention_layernorm", config=self.cfg), - # Dense gated MLP present on every layer (unlike Qwen3Next's MoE). - # gate_proj + up_proj feed into down_proj via SwiGLU activation. - "mlp": GatedMLPBridge( - name="mlp", - config=self.cfg, - submodules={ - "gate": LinearBridge(name="gate_proj"), - "in": LinearBridge(name="up_proj"), - "out": LinearBridge(name="down_proj"), - }, - ), - }, - ), - "ln_final": RMSNormalizationBridge(name="model.norm", config=self.cfg), - "unembed": UnembeddingBridge(name="lm_head"), - } + if transformers.__version__ < self._MIN_TRANSFORMERS_VERSION: + raise ImportError( + f"Qwen3.5 requires transformers >= {self._MIN_TRANSFORMERS_VERSION} " + f"(installed: {transformers.__version__}). " + f"Upgrade with: pip install 'transformers>={self._MIN_TRANSFORMERS_VERSION}'" + ) + setattr(cfg, "gated_q_proj", True) + super().__init__(cfg, hybrid=True) def prepare_loading(self, model_name: str, model_kwargs: dict) -> None: - """Swap the multimodal Qwen3_5Config for its text-only Qwen3_5TextConfig. - - Published Qwen3.5 checkpoints (e.g. Qwen/Qwen3.5-0.8B) carry - model_type='qwen3_5' and architectures=['Qwen3_5ForConditionalGeneration']. - AutoModelForCausalLM would load the full VLM (Qwen3_5ForConditionalGeneration) - with its vision tower, wasting memory and failing the bridge. + """Swap multimodal Qwen3_5Config for text-only Qwen3_5TextConfig. - Instead we replace model_kwargs['config'] with the nested text_config so - AutoModelForCausalLM loads Qwen3_5ForCausalLM (text only). + Published checkpoints carry architectures=['Qwen3_5ForConditionalGeneration']. + We replace config with text_config so AutoModelForCausalLM loads the + text-only Qwen3_5ForCausalLM. """ config = model_kwargs.get("config") if config is not None and hasattr(config, "text_config"): model_kwargs["config"] = config.text_config - def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> None: - """No-op for hybrid models. - - Hybrid models don't map attention as a block submodule (self_attn is - absent on linear-attention layers), so there are no rotary embedding - references to set up. - - Note: to find which layers are full_attention at runtime, use: - layer_types = getattr(hf_model.config, "layer_types", []) - first_full_attn_idx = next( - i for i, t in enumerate(layer_types) if t == "full_attention" - ) - Do NOT use hf_model.config.full_attention_interval -- it is not stored - on the config object (consumed during __init__ to build layer_types). - """ - def preprocess_weights(self, state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: - """Slice query half from q_proj.weight (interleaved per-head layout). - - In Qwen3_5, q_proj.weight has shape (n_heads * head_dim * 2, hidden_size). - Rows are organized as per-head interleaved: - head_0_query (d_head rows), head_0_gate (d_head rows), - head_1_query (d_head rows), head_1_gate (d_head rows), ... - - A naive first-half slice would be wrong. We must reshape by head, then - take the first d_head rows of each head (the query half). + """Slice query half from gated q_proj.weight for weight-space analysis. - Note: since self_attn is NOT currently mapped as a bridge submodule, - these weights will not be loaded by the bridge. This method is included - for correctness and forward-compatibility. + In processed mode, W_Q is the pure query projection (for composition + scores, logit lens). Gate signal available in unprocessed mode on + full-attention layers via blocks.N.attn.hook_q_gate. """ - n_heads = self.cfg.n_heads - d_head = self.cfg.d_head - keys_to_update = [k for k in state_dict if k.endswith(".self_attn.q_proj.weight")] - for key in keys_to_update: - w = state_dict[key] # shape: (n_heads * d_head * 2, hidden_size) - # Reshape to expose per-head layout - w = w.view(n_heads, d_head * 2, -1) - # Take only the first d_head rows of each head (query half) - state_dict[key] = w[:, :d_head, :].reshape(n_heads * d_head, -1) - return state_dict + return self._preprocess_gated_q_proj(state_dict, self.cfg.n_heads, self.cfg.d_head) diff --git a/transformer_lens/model_bridge/supported_architectures/qwen3_next.py b/transformer_lens/model_bridge/supported_architectures/qwen3_next.py index 53e18dce1..31e1be3cd 100644 --- a/transformer_lens/model_bridge/supported_architectures/qwen3_next.py +++ b/transformer_lens/model_bridge/supported_architectures/qwen3_next.py @@ -1,163 +1,34 @@ """Qwen3Next architecture adapter. -Qwen3NextForCausalLM is a hybrid linear-attention + full-attention architecture -with a sparse Mixture-of-Experts MLP on every layer. Layers alternate between -GatedDeltaNet (linear attention) and standard full attention blocks, while the -MLP is always a Qwen3NextSparseMoeBlock (gate router + batched experts + -shared expert). - -Since self_attn is absent on linear-attention layers, we only map submodules -that exist on ALL layers (norms, MLP). The HF native forward handles -linear/full attention dispatch internally, and MoEBridge delegates the entire -MoE forward (including router, experts, and shared expert) to the native -implementation. - -Hook coverage: -- Block-level: hook_resid_pre, hook_resid_post on every layer -- Normalization: ln1 (input_layernorm), ln2 (post_attention_layernorm) -- MLP: hook_in, hook_out on the MoE block (MoEBridge) -- Attention internals are NOT individually hooked (self_attn absent on - linear-attention layers; mapping it would crash on those layers) -- Expert-level internals are NOT individually hooked (batched expert params - live inside Qwen3NextExperts; MoEBridge delegates to HF forward) - -Optional parameters: -- n_key_value_heads: only set when using GQA (num_key_value_heads != num_attention_heads) +Hybrid linear-attention (GatedDeltaNet) + full-attention with sparse MoE MLP. +3 linear-attn layers per 1 full-attn layer. Extends Qwen3 base with +optional attention mapping, MoE MLP, and fold_ln disabled. """ from typing import Any import torch -from transformer_lens.model_bridge.architecture_adapter import ArchitectureAdapter -from transformer_lens.model_bridge.generalized_components import ( - BlockBridge, - EmbeddingBridge, - MoEBridge, - RMSNormalizationBridge, - RotaryEmbeddingBridge, - UnembeddingBridge, +from transformer_lens.model_bridge.generalized_components import MoEBridge +from transformer_lens.model_bridge.supported_architectures.qwen3 import ( + Qwen3ArchitectureAdapter, ) -class Qwen3NextArchitectureAdapter(ArchitectureAdapter): - """Architecture adapter for Qwen3Next models. - - Qwen3NextForCausalLM is a hybrid linear-attention + full-attention - architecture with sparse MoE MLPs, sharing the same design as Qwen3.5: - - Uses RMSNorm for all normalizations - - Uses rotary position embeddings (RoPE) with partial rotation - - Every 4th layer is a full-attention layer (self_attn); the rest are - GatedDeltaNet linear-attention layers (linear_attn) - - Uses Qwen3NextSparseMoeBlock on ALL layers (decoder_sparse_step=1 and - mlp_only_layers=[] on every real checkpoint). The MoE block contains a - top-K router, batched Qwen3NextExperts (experts.gate_up_proj / - experts.down_proj as 3D tensors), plus a shared_expert (gated MLP) and - shared_expert_gate. Each expert is internally a gated MLP. - - No biases on any linear layers - - Full-attention layers have Q/K normalization (q_norm, k_norm) - - Full-attention q_proj outputs n_heads * head_dim * 2 (interleaved - query+gate layout); the preprocess_weights method slices the query half +class Qwen3NextArchitectureAdapter(Qwen3ArchitectureAdapter): + """Hybrid linear-attention + full-attention with sparse MoE MLP. - Since self_attn is absent on linear-attention layers, only universally - present submodules (norms, MLP) are mapped as block submodules. The HF - native forward handles per-layer attention dispatch internally, and - MoEBridge delegates the MoE forward pass (including router + experts + - shared expert) to the native Qwen3NextSparseMoeBlock implementation. - - Optional parameters: - - n_key_value_heads: set when num_key_value_heads != num_attention_heads (GQA) + Same hybrid design as Qwen3.5 but with MoE instead of dense MLP. """ def __init__(self, cfg: Any) -> None: - """Initialize the Qwen3Next architecture adapter.""" - super().__init__(cfg) - - # Core config attributes - self.cfg.normalization_type = "RMS" - self.cfg.positional_embedding_type = "rotary" - self.cfg.final_rms = True - self.cfg.gated_mlp = True - self.cfg.attn_only = False - self.cfg.uses_rms_norm = True - self.cfg.default_prepend_bos = False - - # Disable fold_ln: ln1 is followed by self_attn on full-attention - # layers and by linear_attn (GatedDeltaNet) on linear-attention layers, - # but neither is mapped as a bridge submodule (see class docstring for - # why). With no bridge-mapped target to fold into, the standard fold_ln - # pass leaves LN weights in an inconsistent state and the processed - # bridge output diverges from the unprocessed / HF output. Skipping - # fold_ln keeps processed-mode forward passes numerically equivalent. - self.supports_fold_ln = False - - # Use eager attention to support output_attentions for hook_attn_scores - # and hook_pattern. SDPA doesn't support output_attentions. - self.cfg.attn_implementation = "eager" + setattr(cfg, "gated_q_proj", True) + super().__init__(cfg, hybrid=True) - # GQA: only set n_key_value_heads when using grouped-query attention - if hasattr(cfg, "n_key_value_heads") and cfg.n_key_value_heads is not None: - self.cfg.n_key_value_heads = cfg.n_key_value_heads - - self.weight_processing_conversions: dict = {} - self.component_mapping: dict = { - "embed": EmbeddingBridge(name="model.embed_tokens"), - "rotary_emb": RotaryEmbeddingBridge(name="model.rotary_emb", config=self.cfg), - "blocks": BlockBridge( - name="model.layers", - submodules={ - "ln1": RMSNormalizationBridge(name="input_layernorm", config=self.cfg), - "ln2": RMSNormalizationBridge(name="post_attention_layernorm", config=self.cfg), - # Qwen3NextSparseMoeBlock has a custom Qwen3NextTopKRouter - # (not an nn.Linear) as `gate`, plus batched experts and a - # shared expert. MoEBridge wraps the whole MoE module and - # delegates to HF's native forward, so we don't enumerate - # the internal structure here. - "mlp": MoEBridge(name="mlp", config=self.cfg), - }, - ), - "ln_final": RMSNormalizationBridge(name="model.norm", config=self.cfg), - "unembed": UnembeddingBridge(name="lm_head"), - } - - def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> None: - """No-op for hybrid models. - - Hybrid models don't map attention as a block submodule (self_attn is - absent on linear-attention layers), so there are no rotary embedding - references to set up. - - Note: to find which layers are full_attention at runtime, use: - layer_types = getattr(hf_model.config, "layer_types", []) - first_full_attn_idx = next( - i for i, t in enumerate(layer_types) if t == "full_attention" - ) - Do NOT use hf_model.config.full_attention_interval -- it is not stored - on the config object (consumed during __init__ to build layer_types). - """ + def _build_mlp_bridge(self): + """Sparse MoE MLP (router + batched experts + shared expert).""" + return MoEBridge(name="mlp", config=self.cfg) def preprocess_weights(self, state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: - """Slice query half from q_proj.weight (interleaved per-head layout). - - In Qwen3Next, q_proj.weight has shape (n_heads * head_dim * 2, hidden_size). - Rows are organized as per-head interleaved: - head_0_query (d_head rows), head_0_gate (d_head rows), - head_1_query (d_head rows), head_1_gate (d_head rows), ... - - A naive first-half slice would be wrong. We must reshape by head, then - take the first d_head rows of each head (the query half). - - Note: since self_attn is NOT currently mapped as a bridge submodule, - these weights will not be loaded by the bridge. This method is included - for correctness and forward-compatibility. - """ - n_heads = self.cfg.n_heads - d_head = self.cfg.d_head - keys_to_update = [k for k in state_dict if k.endswith(".self_attn.q_proj.weight")] - for key in keys_to_update: - w = state_dict[key] # shape: (n_heads * d_head * 2, hidden_size) - # Reshape to expose per-head layout - w = w.view(n_heads, d_head * 2, -1) - # Take only the first d_head rows of each head (query half) - state_dict[key] = w[:, :d_head, :].reshape(n_heads * d_head, -1) - return state_dict + """Slice query half from gated q_proj.weight for weight-space analysis.""" + return self._preprocess_gated_q_proj(state_dict, self.cfg.n_heads, self.cfg.d_head) diff --git a/transformer_lens/tools/model_registry/data/architecture_gaps.json b/transformer_lens/tools/model_registry/data/architecture_gaps.json index f3eb11de9..6261a9a65 100644 --- a/transformer_lens/tools/model_registry/data/architecture_gaps.json +++ b/transformer_lens/tools/model_registry/data/architecture_gaps.json @@ -1,18 +1,18 @@ { - "generated_at": "2026-04-10", + "generated_at": "2026-04-14", "scan_info": { - "total_scanned": 5436, + "total_scanned": 5633, "task_filter": "text-generation", "min_downloads": 500, - "scan_duration_seconds": 3.9 + "scan_duration_seconds": 4.2 }, - "total_unsupported_architectures": 401, - "total_unsupported_models": 1459, + "total_unsupported_architectures": 416, + "total_unsupported_models": 1400, "gaps": [ { "architecture_id": "Qwen3_5ForConditionalGeneration", - "total_models": 67, - "total_downloads": 140710, + "total_models": 72, + "total_downloads": 146334, "min_param_count": 211968832, "sample_models": [ "Tesslate/OmniCoder-9B", @@ -20,18 +20,37 @@ "nightmedia/Qwen3.5-27B-Claude-4.6-Opus-Reasoning-Distilled-qx64-hi-mlx", "Brooooooklyn/Qwen3.5-27B-unsloth-mlx", "aifeifei798/Qwen3.5-Queen-27B", - "Jackrong/Qwen3.5-2B-Claude-4.6-Opus-Reasoning-Distilled", "Brooooooklyn/Qwen3.5-9B-unsloth-mlx", - "alexcovo/qwen35-9b-mlx-turboquant-tq3", "bigatuna/Qwen3.5-9b-Sushi-Coder-RL-MLX", - "Jackrong/Qwen3.5-9B-Neo" + "Jackrong/Qwen3.5-2B-Claude-4.6-Opus-Reasoning-Distilled", + "alexcovo/qwen35-9b-mlx-turboquant-tq3", + "Oysiyl/qwen3.5-27b-unslop-good-lora-v1" + ], + "relevancy_score": 91.0 + }, + { + "architecture_id": "Gemma4ForConditionalGeneration", + "total_models": 64, + "total_downloads": 90296, + "min_param_count": 738022691, + "sample_models": [ + "dealignai/Gemma-4-31B-JANG_4M-Uncensored", + "0xSero/gemma-4-21b-a4b-it-REAP", + "InfinimindCreations/gemma-4-E4B-it-uncensored", + "TrevorJS/gemma-4-26B-A4B-it-uncensored", + "WWTCyberLab/gemma-4-31B-it-abliterated", + "WWTCyberLab/gemma-4-26B-A4B-it-abliterated", + "TrevorJS/gemma-4-31B-it-uncensored", + "TrevorJS/gemma-4-E4B-it-uncensored", + "InfinimindCreations/gemma-4-31B-it-uncensored", + "TrevorJS/gemma-4-E2B-it-uncensored" ], - "relevancy_score": 91.5 + "relevancy_score": 84.9 }, { "architecture_id": "DeepseekV3ForCausalLM", - "total_models": 48, - "total_downloads": 6449394, + "total_models": 46, + "total_downloads": 6840308, "min_param_count": 1656048, "sample_models": [ "deepseek-ai/DeepSeek-R1", @@ -39,43 +58,24 @@ "deepseek-ai/DeepSeek-V3", "deepseek-ai/DeepSeek-V3-0324", "moonshotai/Kimi-K2-Instruct-0905", - "deepseek-ai/DeepSeek-V3.1", - "ai-sage/GigaChat3-10B-A1.8B", "moonshotai/Kimi-K2-Instruct", + "deepseek-ai/DeepSeek-V3.1", "trl-internal-testing/tiny-DeepseekV3ForCausalLM", - "trl-internal-testing/tiny-DeepseekV3ForCausalLM-0528" - ], - "relevancy_score": 87.2 - }, - { - "architecture_id": "Qwen3MoeForCausalLM", - "total_models": 45, - "total_downloads": 5469133, - "min_param_count": 2574656, - "sample_models": [ - "Qwen/Qwen3-30B-A3B", - "Qwen/Qwen3-Coder-30B-A3B-Instruct", - "Qwen/Qwen3-30B-A3B-Instruct-2507", - "Qwen/Qwen3-235B-A22B", - "trl-internal-testing/tiny-Qwen3MoeForCausalLM", - "Qwen/Qwen3-30B-A3B-Thinking-2507", - "Qwen/Qwen3-235B-A22B-Instruct-2507", - "Qwen/Qwen3-Coder-480B-A35B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "NVFP4/Qwen3-30B-A3B-Instruct-2507-FP4" + "trl-internal-testing/tiny-DeepseekV3ForCausalLM-0528", + "moonshotai/Moonlight-16B-A3B-Instruct" ], - "relevancy_score": 84.9 + "relevancy_score": 83.1 }, { "architecture_id": "NemotronHForCausalLM", - "total_models": 40, - "total_downloads": 3187865, + "total_models": 41, + "total_downloads": 3587883, "min_param_count": 4221480, "sample_models": [ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", - "nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese", "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", + "nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese", "nvidia/Nemotron-Cascade-2-30B-A3B", "nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16", "unsloth/NVIDIA-Nemotron-3-Nano-4B", @@ -83,69 +83,31 @@ "empero-ai/openNemo-9B", "cpagac/Nemotron-Nano-9B-v2-heretic" ], - "relevancy_score": 80.3 - }, - { - "architecture_id": "Qwen3_5ForCausalLM", - "total_models": 52, - "total_downloads": 81342, - "min_param_count": 752393024, - "sample_models": [ - "lukey03/Qwen3.5-9B-abliterated", - "GoodStartLabs/gin-rummy-hbc-qwen3.5-0.8b", - "aifeifei798/Darkidol-Ballad-27B", - "brocchirodrigo/anotaai-ajuda-qwen3_5_Q4", - "kai-os/Carnice-9b", - "llmfan46/Darkidol-Ballad-27B-ultra-uncensored-heretic-v1", - "llmfan46/Darkidol-Ballad-27B-ultra-uncensored-heretic-v2", - "Phonsiri/Qwen3.5-9B-Thai-Law-Base", - "continuum-ai/qwen3.5-4b-code-forged", - "aifeifei798/Darkidol-Ballad-9B" - ], - "relevancy_score": 80.2 + "relevancy_score": 78.6 }, { "architecture_id": "Lfm2ForCausalLM", "total_models": 40, - "total_downloads": 1395683, + "total_downloads": 1626247, "min_param_count": 274754048, "sample_models": [ "farbodtavakkoli/OTel-LLM-1.2B-IT", "LiquidAI/LFM2.5-1.2B-Instruct", "LiquidAI/LFM2-1.2B", - "LiquidAI/LFM2-350M", "LiquidAI/LFM2.5-350M", + "LiquidAI/LFM2-350M", "LiquidAI/LFM2.5-1.2B-Thinking", "LiquidAI/LFM2-2.6B-Exp", "LiquidAI/LFM2.5-1.2B-Base", "LiquidAI/LFM2-700M", "unsloth/LFM2.5-1.2B-Instruct" ], - "relevancy_score": 78.4 - }, - { - "architecture_id": "Gemma4ForConditionalGeneration", - "total_models": 37, - "total_downloads": 51866, - "min_param_count": 2084387402, - "sample_models": [ - "dealignai/Gemma-4-31B-JANG_4M-Uncensored", - "0xSero/gemma-4-21b-a4b-it-REAP", - "InfinimindCreations/gemma-4-E4B-it-uncensored", - "lthn/lemma", - "TrevorJS/gemma-4-26B-A4B-it-uncensored", - "livadies/gemma-4-E2B-Ghetto-NF4", - "Greytechai/Gemma-4-31B-JANG_4M-CRACK", - "WWTCyberLab/gemma-4-31B-it-abliterated", - "WWTCyberLab/gemma-4-26B-A4B-it-abliterated", - "InfinimindCreations/gemma-4-31B-it-uncensored" - ], - "relevancy_score": 65.1 + "relevancy_score": 76.3 }, { "architecture_id": "QWenLMHeadModel", "total_models": 22, - "total_downloads": 495498, + "total_downloads": 522223, "min_param_count": 19545408, "sample_models": [ "cckevinn/SeeClick", @@ -156,91 +118,53 @@ "Qwen/Qwen-1_8B-Chat", "Qwen/Qwen-14B-Chat", "Qwen/Qwen-14B", - "Xingyu-Zheng/Qwen-VL-Chat", - "Qwen/Qwen-72B" - ], - "relevancy_score": 64.0 - }, - { - "architecture_id": "InternLM2ForCausalLM", - "total_models": 23, - "total_downloads": 253936, - "min_param_count": 24052864, - "sample_models": [ - "internlm/internlm2-chat-7b", - "internlm/internlm2_5-7b-chat", - "internlm/internlm2-7b", - "internlm/internlm2-20b", - "internlm/internlm2-base-7b", - "internlm/internlm2-chat-20b", - "internlm/internlm2-base-20b", - "chujiezheng/internlm2-chat-20b-ExPO", - "chujiezheng/internlm2-chat-7b-ExPO", - "internlm/internlm2-1_8b" + "Qwen/Qwen-Audio-Chat", + "Xingyu-Zheng/Qwen-VL-Chat" ], - "relevancy_score": 63.2 + "relevancy_score": 62.5 }, { - "architecture_id": "GPTBigCodeForCausalLM", - "total_models": 24, - "total_downloads": 109509, - "min_param_count": 1845928, + "architecture_id": "DeepseekV32ForCausalLM", + "total_models": 12, + "total_downloads": 9006409, + "min_param_count": 136559748, "sample_models": [ - "bigcode/gpt_bigcode-santacoder", - "bigcode/tiny_starcoder_py", - "bigcode/starcoder", - "bigcode/starcoderbase-1b", - "ibm-granite/granite-20b-code-base-8k", - "GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct", - "HuggingFaceH4/starchat-alpha", - "defog/sqlcoder2", - "HuggingFaceH4/starchat-beta", - "ibm-granite/granite-20b-code-instruct-8k" + "deepseek-ai/DeepSeek-V3.2", + "deepseek-ai/DeepSeek-V3.2-Exp", + "deepseek-ai/DeepSeek-V3.2-Speciale", + "deepseek-ai/DeepSeek-Math-V2", + "zianglih/DeepSeek-V3.2-6layer-last-1-bf16", + "exolabs/DeepSeek-V3.2_bf16", + "deepseek-ai/DeepSeek-V3.2-Exp-Base", + "cs2764/DeepSeek-V3.2_dq4-mlx", + "zianglih/DeepSeek-V3.2-MXFP8", + "hyper-accel/tiny-random-deepseek-v32" ], - "relevancy_score": 62.0 + "relevancy_score": 62.5 }, { - "architecture_id": "T5GemmaForConditionalGeneration", - "total_models": 14, - "total_downloads": 1037477, - "min_param_count": 312517632, + "architecture_id": "JambaForCausalLM", + "total_models": 28, + "total_downloads": 48565, + "min_param_count": 127679344, "sample_models": [ - "google/t5gemma-s-s-prefixlm", - "google/t5gemma-9b-9b-ul2", - "google/t5gemma-b-b-ul2", - "google/t5gemma-2b-2b-prefixlm", - "google/t5gemma-2b-2b-ul2", - "google/t5gemma-l-l-ul2-it", - "google/t5gemma-ml-ml-ul2-it", - "google/t5gemma-b-b-prefixlm", - "google/t5gemma-s-s-prefixlm-it", - "google/t5gemma-9b-9b-prefixlm" - ], - "relevancy_score": 60.3 - }, - { - "architecture_id": "XGLMForCausalLM", - "total_models": 18, - "total_downloads": 223424, - "min_param_count": 162256896, - "sample_models": [ - "facebook/xglm-564M", - "facebook/incoder-1B", - "facebook/xglm-7.5B", - "facebook/xglm-4.5B", - "facebook/xglm-1.7B", - "KoboldAI/fairseq-dense-2.7B", - "KoboldAI/fairseq-dense-125M", - "KoboldAI/fairseq-dense-355M", - "KoboldAI/fairseq-dense-13B", - "KoboldAI/fairseq-dense-1.3B" + "ai21labs/AI21-Jamba-Mini-1.5", + "ai21labs/Jamba-tiny-random", + "ai21labs/AI21-Jamba-Mini-1.6", + "ai21labs/AI21-Jamba-Large-1.5", + "ai21labs/AI21-Jamba2-3B", + "ai21labs/AI21-Jamba-Large-1.6", + "ai21labs/Jamba-v0.1", + "ai21labs/AI21-Jamba2-Mini", + "ai21labs/AI21-Jamba-Reasoning-3B", + "microsoft/Dayhoff-170M-GRS-112000" ], - "relevancy_score": 59.6 + "relevancy_score": 61.1 }, { "architecture_id": "Glm4MoeForCausalLM", - "total_models": 14, - "total_downloads": 742282, + "total_models": 16, + "total_downloads": 751068, "min_param_count": 2572352, "sample_models": [ "zai-org/GLM-4.5-Air", @@ -249,72 +173,55 @@ "zai-org/GLM-4.5", "zai-org/GLM-4.6", "np-cr/testing-glm4-moe", - "ArliAI/GLM-4.6-Derestricted-v3", "PrimeIntellect/GLM-0.5B", + "ArliAI/GLM-4.6-Derestricted-v3", "zai-org/GLM-4.5-Air-Base", "PrimeIntellect/INTELLECT-3" ], "relevancy_score": 59.6 }, { - "architecture_id": "JambaForCausalLM", - "total_models": 22, - "total_downloads": 44090, - "min_param_count": 127679344, - "sample_models": [ - "ai21labs/AI21-Jamba-Mini-1.5", - "ai21labs/Jamba-tiny-random", - "ai21labs/AI21-Jamba-Mini-1.6", - "ai21labs/AI21-Jamba-Large-1.5", - "ai21labs/AI21-Jamba-Large-1.6", - "ai21labs/AI21-Jamba2-3B", - "ai21labs/Jamba-v0.1", - "ai21labs/AI21-Jamba2-Mini", - "ai21labs/AI21-Jamba-Reasoning-3B", - "microsoft/Dayhoff-170m-GR" - ], - "relevancy_score": 58.6 - }, - { - "architecture_id": "DeepseekV32ForCausalLM", - "total_models": 8, - "total_downloads": 1446699, - "min_param_count": 136559748, + "architecture_id": "T5GemmaForConditionalGeneration", + "total_models": 14, + "total_downloads": 1062491, + "min_param_count": 312517632, "sample_models": [ - "deepseek-ai/DeepSeek-V3.2", - "deepseek-ai/DeepSeek-V3.2-Exp", - "deepseek-ai/DeepSeek-V3.2-Speciale", - "deepseek-ai/DeepSeek-Math-V2", - "exolabs/DeepSeek-V3.2_bf16", - "deepseek-ai/DeepSeek-V3.2-Exp-Base", - "hyper-accel/tiny-random-deepseek-v32", - "cs2764/DeepSeek-V3.2_dq4-mlx" + "google/t5gemma-s-s-prefixlm", + "google/t5gemma-b-b-ul2", + "google/t5gemma-9b-9b-ul2", + "google/t5gemma-2b-2b-prefixlm", + "google/t5gemma-2b-2b-ul2", + "google/t5gemma-l-l-ul2-it", + "google/t5gemma-ml-ml-ul2-it", + "google/t5gemma-b-b-prefixlm", + "google/t5gemma-s-s-prefixlm-it", + "google/t5gemma-9b-9b-prefixlm" ], - "relevancy_score": 57.0 + "relevancy_score": 59.1 }, { - "architecture_id": "BaichuanForCausalLM", - "total_models": 15, - "total_downloads": 115111, - "min_param_count": 16204352, + "architecture_id": "GPTBigCodeForCausalLM", + "total_models": 24, + "total_downloads": 39369, + "min_param_count": 1845928, "sample_models": [ - "baichuan-inc/Baichuan2-7B-Chat", - "baichuan-inc/Baichuan2-13B-Chat", - "baichuan-inc/Baichuan-13B-Chat", - "baichuan-inc/Baichuan2-7B-Base", - "baichuan-inc/Baichuan2-13B-Base", - "sakuraumi/Sakura-13B-Galgame", - "zxbsmk/NSFW_13B_sft", - "katuni4ka/tiny-random-baichuan2", - "baichuan-inc/Baichuan-13B-Base", - "FreedomIntelligence/HuatuoGPT2-7B" + "bigcode/starcoder", + "bigcode/starcoderbase-1b", + "ibm-granite/granite-20b-code-base-8k", + "GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct", + "HuggingFaceH4/starchat-alpha", + "defog/sqlcoder2", + "ibm-granite/granite-20b-code-instruct-8k", + "HuggingFaceH4/starchat-beta", + "LoupGarou/WizardCoder-Guanaco-15B-V1.0", + "openchat/opencoderplus" ], - "relevancy_score": 56.1 + "relevancy_score": 58.1 }, { "architecture_id": "SmolLM3ForCausalLM", - "total_models": 7, - "total_downloads": 1155432, + "total_models": 8, + "total_downloads": 1123995, "min_param_count": 8245568, "sample_models": [ "HuggingFaceTB/SmolLM3-3B", @@ -322,15 +229,35 @@ "optimum-internal-testing/tiny-random-SmolLM3ForCausalLM", "unsloth/SmolLM3-3B", "onnx-internal-testing/tiny-random-SmolLM3ForCausalLM", - "MInAlA/smollm3-dpo-merged", - "N-Bot-Int/SmolSam3-MEMGRPO" + "MInAlA/SmolLM3-3B-DPO-merged", + "N-Bot-Int/SmolSam3-MEMGRPO", + "yujiepan/smollm3-tiny-random" + ], + "relevancy_score": 55.5 + }, + { + "architecture_id": "GlmMoeDsaForCausalLM", + "total_models": 10, + "total_downloads": 590748, + "min_param_count": 162774148, + "sample_models": [ + "zai-org/GLM-5", + "zai-org/GLM-5.1", + "cs2764/GLM-5-abliterated-dq4-mlx", + "livadies/GLM-5.1-Ghetto-MoE-2-Experts", + "unsloth/GLM-5", + "JANGQ-AI/GLM-5.1-JANG_2S", + "0xSero/GLM-5-REAP-381B", + "JANGQ-AI/GLM-5.1-JANG_1L", + "cs2764/GLM-5-abliterated-dq3-mlx", + "hyper-accel/tiny-random-glm-moe-dsa" ], - "relevancy_score": 55.9 + "relevancy_score": 55.3 }, { "architecture_id": "BartForConditionalGeneration", "total_models": 9, - "total_downloads": 599134, + "total_downloads": 692599, "min_param_count": 6044480, "sample_models": [ "KomeijiForce/bart-large-emojilm", @@ -343,12 +270,31 @@ "Tianlin668/MentalBART", "KomeijiForce/bart-large-emojilm-e2t" ], - "relevancy_score": 55.7 + "relevancy_score": 55.0 + }, + { + "architecture_id": "BaichuanForCausalLM", + "total_models": 15, + "total_downloads": 117761, + "min_param_count": 16204352, + "sample_models": [ + "baichuan-inc/Baichuan2-7B-Chat", + "baichuan-inc/Baichuan2-13B-Chat", + "baichuan-inc/Baichuan-13B-Chat", + "baichuan-inc/Baichuan2-7B-Base", + "baichuan-inc/Baichuan2-13B-Base", + "zxbsmk/NSFW_13B_sft", + "sakuraumi/Sakura-13B-Galgame", + "baichuan-inc/Baichuan-13B-Base", + "katuni4ka/tiny-random-baichuan2", + "FreedomIntelligence/HuatuoGPT2-7B" + ], + "relevancy_score": 54.9 }, { "architecture_id": "FalconH1ForCausalLM", "total_models": 15, - "total_downloads": 76731, + "total_downloads": 77408, "min_param_count": 91131072, "sample_models": [ "tiiuae/Falcon-H1-0.5B-Base", @@ -359,80 +305,45 @@ "tiiuae/Falcon-H1-1.5B-Base", "tiiuae/Falcon-H1-Tiny-90M-Instruct", "tiiuae/Falcon-H1R-7B", - "tiiuae/Falcon-H1-1.5B-Deep-Instruct", - "tiiuae/Falcon-H1-3B-Instruct" - ], - "relevancy_score": 55.2 - }, - { - "architecture_id": "CohereForCausalLM", - "total_models": 10, - "total_downloads": 193414, - "min_param_count": 2042176, - "sample_models": [ - "trl-internal-testing/tiny-CohereForCausalLM", - "CohereLabs/aya-expanse-8b", - "CohereLabs/c4ai-command-r-v01", - "CohereLabs/aya-23-8B", - "NLPark/AnFeng_v3_Avocet", - "CohereLabs/aya-expanse-32b", - "CohereLabs/aya-23-35B", - "CohereLabs/c4ai-command-r-plus-08-2024", - "CohereLabs/c4ai-command-r-08-2024", - "CohereLabs/c4ai-command-r-plus" + "tiiuae/Falcon-H1-3B-Instruct", + "tiiuae/Falcon-H1-1.5B-Deep-Instruct" ], - "relevancy_score": 53.9 + "relevancy_score": 54.0 }, { "architecture_id": "H2OVLChatModel", "total_models": 2, - "total_downloads": 2131755, + "total_downloads": 2009160, "min_param_count": 826295808, "sample_models": [ "h2oai/h2ovl-mississippi-800m", "h2oai/h2ovl-mississippi-2b" ], - "relevancy_score": 53.9 - }, - { - "architecture_id": "MiniCPMForCausalLM", - "total_models": 12, - "total_downloads": 93202, - "min_param_count": 80000640, - "sample_models": [ - "openbmb/MiniCPM-2B-sft-bf16", - "openbmb/MiniCPM4.1-8B", - "openbmb/MiniCPM-1B-sft-bf16", - "openbmb/MiniCPM4-0.5B", - "openbmb/MiniCPM-MoE-8x2B", - "openbmb/MiniCPM-S-1B-sft", - "katuni4ka/tiny-random-minicpm", - "openbmb/MiniCPM4-8B", - "openbmb/MiniCPM-2B-sft-fp32", - "openbmb/MiniCPM-2B-dpo-bf16" - ], - "relevancy_score": 53.6 + "relevancy_score": 53.0 }, { - "architecture_id": "GlmMoeDsaForCausalLM", - "total_models": 7, - "total_downloads": 411962, - "min_param_count": 162774148, + "architecture_id": "DFlashDraftModel", + "total_models": 11, + "total_downloads": 131573, + "min_param_count": 473995264, "sample_models": [ - "zai-org/GLM-5", - "zai-org/GLM-5.1", - "cs2764/GLM-5-abliterated-dq4-mlx", - "0xSero/GLM-5-REAP-381B", - "unsloth/GLM-5", - "cs2764/GLM-5-abliterated-dq3-mlx", - "hyper-accel/tiny-random-glm-moe-dsa" + "z-lab/Qwen3-4B-DFlash-b16", + "z-lab/Qwen3-8B-DFlash-b16", + "z-lab/Qwen3.5-27B-DFlash", + "z-lab/Qwen3.5-9B-DFlash", + "z-lab/Qwen3.5-4B-DFlash", + "z-lab/Qwen3.5-35B-A3B-DFlash", + "z-lab/gpt-oss-120b-DFlash", + "z-lab/gpt-oss-20b-DFlash", + "z-lab/Qwen3-Coder-30B-A3B-DFlash", + "z-lab/LLaMA3.1-8B-Instruct-DFlash-UltraChat" ], - "relevancy_score": 53.6 + "relevancy_score": 52.6 }, { "architecture_id": "Cohere2ForCausalLM", "total_models": 9, - "total_downloads": 207703, + "total_downloads": 220158, "min_param_count": 2090024, "sample_models": [ "trl-internal-testing/tiny-Cohere2ForCausalLM", @@ -445,12 +356,12 @@ "CohereLabs/tiny-aya-fire", "CohereLabs/tiny-aya-earth" ], - "relevancy_score": 53.4 + "relevancy_score": 52.5 }, { "architecture_id": "PhiMoEForCausalLM", "total_models": 4, - "total_downloads": 889098, + "total_downloads": 902073, "min_param_count": 1110112, "sample_models": [ "microsoft/Phi-tiny-MoE-instruct", @@ -458,37 +369,37 @@ "microsoft/Phi-3.5-MoE-instruct", "optimum-intel-internal-testing/phi-3.5-moe-tiny-random" ], - "relevancy_score": 53.3 + "relevancy_score": 52.5 }, { - "architecture_id": "MPTForCausalLM", - "total_models": 26, - "total_downloads": 41278, - "min_param_count": 6649286656, + "architecture_id": "MiniCPMForCausalLM", + "total_models": 12, + "total_downloads": 90418, + "min_param_count": 80000640, "sample_models": [ - "vinai/PhoGPT-4B-Chat", - "vinai/PhoGPT-4B", - "anas-awadalla/mpt-7b", - "gl198976/mpt-7b-instruct", - "replit/replit-code-v1-3b", - "echarlaix/tiny-mpt-random-remote-code", - "wtang06/mpt-125m-c4", - "lightblue/japanese-mpt-7b", - "gl198976/mpt-7b", - "TehVenom/MPT-7b-Chat-Instruct-LongCTX-Merge" + "openbmb/MiniCPM-2B-sft-bf16", + "openbmb/MiniCPM4.1-8B", + "openbmb/MiniCPM-1B-sft-bf16", + "openbmb/MiniCPM4-0.5B", + "openbmb/MiniCPM-MoE-8x2B", + "openbmb/MiniCPM-S-1B-sft", + "openbmb/MiniCPM4-8B", + "katuni4ka/tiny-random-minicpm", + "openbmb/MiniCPM-2B-dpo-bf16", + "openbmb/MiniCPM-2B-sft-fp32" ], - "relevancy_score": 53.2 + "relevancy_score": 52.4 }, { "architecture_id": "RwkvForCausalLM", "total_models": 15, - "total_downloads": 31498, + "total_downloads": 29790, "min_param_count": 169342464, "sample_models": [ "RWKV/v5-Eagle-7B-HF", "RWKV/rwkv-4-169m-pile", - "beomi/KoRWKV-6B", "RWKV/rwkv-4-430m-pile", + "beomi/KoRWKV-6B", "RWKV/rwkv-4-1b5-pile", "RWKV/rwkv-4-3b-pile", "RWKV/rwkv-raven-1b5", @@ -496,12 +407,12 @@ "RWKV/rwkv-raven-3b", "RWKV/rwkv-raven-14b" ], - "relevancy_score": 53.2 + "relevancy_score": 51.9 }, { "architecture_id": "MT5ForConditionalGeneration", "total_models": 13, - "total_downloads": 55149, + "total_downloads": 51271, "min_param_count": 300176768, "sample_models": [ "knowledgator/IUPAC2SMILES-canonical-base", @@ -515,47 +426,43 @@ "intelia-lab-uah/mt0-base_QG_SQAC", "UBC-NLP/toucan-1.2B" ], - "relevancy_score": 53.1 - }, - { - "architecture_id": "DFlashDraftModel", - "total_models": 10, - "total_downloads": 128716, - "min_param_count": 473995264, - "sample_models": [ - "z-lab/Qwen3-4B-DFlash-b16", - "z-lab/Qwen3-8B-DFlash-b16", - "z-lab/Qwen3.5-9B-DFlash", - "z-lab/Qwen3.5-27B-DFlash", - "z-lab/Qwen3.5-4B-DFlash", - "z-lab/gpt-oss-120b-DFlash", - "z-lab/gpt-oss-20b-DFlash", - "z-lab/Qwen3.5-35B-A3B-DFlash", - "z-lab/LLaMA3.1-8B-Instruct-DFlash-UltraChat", - "z-lab/Qwen3-Coder-30B-A3B-DFlash" - ], - "relevancy_score": 53.0 + "relevancy_score": 51.8 }, { "architecture_id": "Qwen2MoeForCausalLM", "total_models": 7, - "total_downloads": 203653, + "total_downloads": 193536, "min_param_count": 1219036, "sample_models": [ "Qwen/Qwen1.5-MoE-A2.7B", "Qwen/Qwen1.5-MoE-A2.7B-Chat", "Qwen/Qwen2-57B-A14B-Instruct", "Qwen/Qwen2-57B-A14B", - "katuni4ka/tiny-random-qwen1.5-moe", "yujiepan/qwen1.5-moe-tiny-random", + "katuni4ka/tiny-random-qwen1.5-moe", "xd2010/Qwen1.5-MOE-sft-math7k-densemixer" ], - "relevancy_score": 52.0 + "relevancy_score": 51.0 + }, + { + "architecture_id": "FalconMambaForCausalLM", + "total_models": 6, + "total_downloads": 194376, + "min_param_count": 525400, + "sample_models": [ + "trl-internal-testing/tiny-FalconMambaForCausalLM", + "tiiuae/falcon-mamba-7b-instruct", + "tiiuae/falcon-mamba-7b", + "tiiuae/falcon-mamba-tiny-dev", + "tiiuae/Falcon3-Mamba-7B-Instruct", + "tiiuae/Falcon3-Mamba-7B-Base" + ], + "relevancy_score": 50.4 }, { "architecture_id": "Phi3VForCausalLM", "total_models": 6, - "total_downloads": 174972, + "total_downloads": 173011, "min_param_count": 304612720, "sample_models": [ "microsoft/Phi-3-vision-128k-instruct", @@ -565,12 +472,12 @@ "Desm0nt/Phi-3-HornyVision-128k-instruct", "failspy/Phi-3-vision-128k-instruct-abliterated-alpha" ], - "relevancy_score": 51.0 + "relevancy_score": 50.1 }, { "architecture_id": "ExaoneForCausalLM", "total_models": 7, - "total_downloads": 660526, + "total_downloads": 626575, "min_param_count": 2405327360, "sample_models": [ "LGAI-EXAONE/EXAONE-Deep-7.8B", @@ -581,26 +488,31 @@ "LGAI-EXAONE/EXAONE-Deep-32B", "LGAI-EXAONE/EXAONE-Deep-2.4B" ], - "relevancy_score": 50.6 + "relevancy_score": 49.5 }, { - "architecture_id": "FalconMambaForCausalLM", - "total_models": 5, - "total_downloads": 186669, - "min_param_count": 525400, + "architecture_id": "Glm4ForCausalLM", + "total_models": 10, + "total_downloads": 32445, + "min_param_count": 4854928, "sample_models": [ - "trl-internal-testing/tiny-FalconMambaForCausalLM", - "tiiuae/falcon-mamba-7b-instruct", - "tiiuae/falcon-mamba-7b", - "tiiuae/falcon-mamba-tiny-dev", - "tiiuae/Falcon3-Mamba-7B-Instruct" + "zai-org/GLM-4-9B-0414", + "zai-org/GLM-4-32B-0414", + "zai-org/GLM-Z1-9B-0414", + "MCult01/glm-muse-v2", + "zai-org/GLM-Z1-32B-0414", + "MCult01/glm-muse-v1", + "zai-org/GLM-4-32B-Base-0414", + "yujiepan/glm-4-tiny-random", + "llmfan46/GLM-4-32B-0414-uncensored-heretic-v1", + "ccui46/cookingworld_per_chunk_act_glm_tokfix_diffPrompt_5000" ], - "relevancy_score": 50.5 + "relevancy_score": 49.0 }, { "architecture_id": "LlavaQwenForCausalLM", "total_models": 4, - "total_downloads": 165137, + "total_downloads": 186477, "min_param_count": 893618208, "sample_models": [ "lmms-lab/llava-onevision-qwen2-7b-ov", @@ -608,24 +520,62 @@ "lmms-lab/llava-onevision-qwen2-7b-si", "lmms-lab/llava-onevision-qwen2-0.5b-si" ], - "relevancy_score": 49.5 + "relevancy_score": 49.0 + }, + { + "architecture_id": "MiniMaxM2ForCausalLM", + "total_models": 23, + "total_downloads": 1143531, + "min_param_count": 18581099008, + "sample_models": [ + "MiniMaxAI/MiniMax-M2.5", + "cerebras/MiniMax-M2.1-REAP-139B-A10B", + "MiniMaxAI/MiniMax-M2", + "MiniMaxAI/MiniMax-M2.7", + "MiniMaxAI/MiniMax-M2.1", + "cerebras/MiniMax-M2.5-REAP-139B-A10B", + "JANGQ-AI/MiniMax-M2.7-JANG_2L", + "aspctu/MiniMax-M2.5", + "JANGQ-AI/MiniMax-M2.7-JANG_3L", + "dealignai/MiniMax-M2.5-UNCENSORED-JANG_2L" + ], + "relevancy_score": 48.9 + }, + { + "architecture_id": "LlamaForCausalLMEagle3", + "total_models": 11, + "total_downloads": 22792, + "min_param_count": 145422848, + "sample_models": [ + "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3", + "nvidia/gpt-oss-120b-Eagle3-long-context", + "chankhavu/c2.eagle3-test", + "Zjcxy-SmartAI/Eagle3-Qwen3-32B-zh", + "nvidia/gpt-oss-120b-Eagle3-short-context", + "Zjcxy-SmartAI/Eagle3-Qwen3-8B-zh", + "nvidia/gpt-oss-120b-Eagle3-throughput", + "thoughtworks/Gemma-4-31B-Eagle3", + "ruipeterpan/Qwen2.5-7B-Instruct_EAGLE3_UltraChat", + "thoughtworks/MiniMax-M2.5-Eagle3" + ], + "relevancy_score": 48.8 }, { "architecture_id": "BambaForCausalLM", "total_models": 3, - "total_downloads": 224342, + "total_downloads": 225000, "min_param_count": 33110760, "sample_models": [ "hmellor/tiny-random-BambaForCausalLM", "ibm-ai-platform/Bamba-9B-v1", "ibm-ai-platform/Bamba-9B-v2" ], - "relevancy_score": 49.5 + "relevancy_score": 48.8 }, { "architecture_id": "Eagle3Speculator", "total_models": 5, - "total_downloads": 105711, + "total_downloads": 104424, "min_param_count": 950186496, "sample_models": [ "RedHatAI/Qwen3-8B-speculator.eagle3", @@ -634,41 +584,41 @@ "RedHatAI/Qwen3-32B-speculator.eagle3", "RedHatAI/Qwen3-14B-speculator.eagle3" ], - "relevancy_score": 49.2 + "relevancy_score": 48.4 }, { "architecture_id": "OpenAIGPTLMHeadModel", "total_models": 2, - "total_downloads": 236281, + "total_downloads": 230174, "min_param_count": 119680512, "sample_models": [ "openai-community/openai-gpt", "lgaalves/gpt1" ], - "relevancy_score": 49.0 + "relevancy_score": 48.2 }, { "architecture_id": "HunYuanDenseV1ForCausalLM", "total_models": 9, - "total_downloads": 28409, + "total_downloads": 28771, "min_param_count": 539010048, "sample_models": [ "tencent/Hunyuan-7B-Instruct", "tencent/Hunyuan-0.5B-Pretrain", "tencent/Hunyuan-1.8B-Pretrain", - "tencent/Hunyuan-4B-Pretrain", "tencent/Hunyuan-7B-Instruct-0124", + "tencent/Hunyuan-4B-Pretrain", "tencent/Hunyuan-7B-Pretrain", "tencent/Hunyuan-1.8B-Instruct", "tencent/Hunyuan-0.5B-Instruct", "tencent/Hunyuan-4B-Instruct" ], - "relevancy_score": 48.9 + "relevancy_score": 48.1 }, { "architecture_id": "BloomModel", "total_models": 8, - "total_downloads": 38422, + "total_downloads": 39579, "min_param_count": 16156544, "sample_models": [ "bigscience/bigscience-small-testing", @@ -680,30 +630,12 @@ "Muennighoff/bloom-tiny-random", "TurkuNLP/gpt3-finnish-xl" ], - "relevancy_score": 48.9 - }, - { - "architecture_id": "LlamaForCausalLMEagle3", - "total_models": 9, - "total_downloads": 20292, - "min_param_count": 208676608, - "sample_models": [ - "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3", - "nvidia/gpt-oss-120b-Eagle3-long-context", - "nvidia/gpt-oss-120b-Eagle3-short-context", - "Zjcxy-SmartAI/Eagle3-Qwen3-32B-zh", - "chankhavu/c2.eagle3-test", - "Zjcxy-SmartAI/Eagle3-Qwen3-8B-zh", - "nvidia/gpt-oss-120b-Eagle3-throughput", - "thoughtworks/Gemma-4-31B-Eagle3", - "ruipeterpan/Qwen2.5-7B-Instruct_EAGLE3_UltraChat" - ], - "relevancy_score": 48.2 + "relevancy_score": 48.1 }, { "architecture_id": "NemotronForCausalLM", "total_models": 5, - "total_downloads": 59740, + "total_downloads": 63951, "min_param_count": 2150720, "sample_models": [ "nvidia/Nemotron-Mini-4B-Instruct", @@ -712,89 +644,57 @@ "badaoui/tiny-random-NemotronForCausalLM", "thhaus/nemotron3-8b" ], - "relevancy_score": 47.9 - }, - { - "architecture_id": "Glm4ForCausalLM", - "total_models": 7, - "total_downloads": 30432, - "min_param_count": 4854928, - "sample_models": [ - "zai-org/GLM-4-9B-0414", - "zai-org/GLM-Z1-32B-0414", - "zai-org/GLM-Z1-9B-0414", - "zai-org/GLM-4-32B-0414", - "zai-org/GLM-4-32B-Base-0414", - "llmfan46/GLM-4-32B-0414-uncensored-heretic-v1", - "yujiepan/glm-4-tiny-random" - ], - "relevancy_score": 47.7 + "relevancy_score": 47.3 }, { "architecture_id": "HyenaDNAForCausalLM", "total_models": 6, - "total_downloads": 38899, + "total_downloads": 38536, "min_param_count": 450712, "sample_models": [ "LongSafari/hyenadna-small-32k-seqlen-hf", "LongSafari/hyenadna-medium-450k-seqlen-hf", - "LongSafari/hyenadna-large-1m-seqlen-hf", "LongSafari/hyenadna-tiny-1k-seqlen-hf", + "LongSafari/hyenadna-large-1m-seqlen-hf", "LongSafari/hyenadna-medium-160k-seqlen-hf", "LongSafari/hyenadna-tiny-16k-seqlen-d128-hf" ], - "relevancy_score": 47.6 - }, - { - "architecture_id": "ProGenForCausalLM", - "total_models": 5, - "total_downloads": 47595, - "min_param_count": 151148576, - "sample_models": [ - "hugohrban/progen2-base", - "hugohrban/progen2-small", - "hugohrban/progen2-medium", - "hugohrban/progen2-large", - "hugohrban/progen2-small-mix7" - ], - "relevancy_score": 47.4 - }, - { - "architecture_id": "Eagle3DraftModel", - "total_models": 7, - "total_downloads": 24688, - "min_param_count": 522152832, - "sample_models": [ - "RedHatAI/gpt-oss-20b-speculator.eagle3", - "RedHatAI/gpt-oss-120b-speculator.eagle3", - "RedHatAI/Qwen3-30B-A3B-Thinking-2507-speculator.eagle3", - "RedHatAI/Qwen3-235B-A22B-Instruct-2507-speculator.eagle3", - "RedHatAI/Qwen3-30B-A3B-Instruct-2507-speculator.eagle3", - "RedHatAI/Qwen3-30B-A3B-speculator.eagle3", - "RedHatAI/Qwen3-32B-Thinking-speculator.eagle3" - ], - "relevancy_score": 47.3 + "relevancy_score": 46.8 }, { "architecture_id": "T5WithLMHeadModel", "total_models": 7, - "total_downloads": 25117, + "total_downloads": 25552, "min_param_count": 222903936, "sample_models": [ "unicamp-dl/ptt5-base-portuguese-vocab", "Salesforce/codet5-large", "Salesforce/codet5-large-ntp-py", "Rostlab/prot_t5_xl_bfd", - "unicamp-dl/ptt5-small-portuguese-vocab", "gagan3012/k2t", + "unicamp-dl/ptt5-small-portuguese-vocab", "unicamp-dl/ptt5-large-portuguese-vocab" ], - "relevancy_score": 47.3 + "relevancy_score": 46.6 + }, + { + "architecture_id": "ProGenForCausalLM", + "total_models": 5, + "total_downloads": 46959, + "min_param_count": 151148576, + "sample_models": [ + "hugohrban/progen2-base", + "hugohrban/progen2-small", + "hugohrban/progen2-medium", + "hugohrban/progen2-large", + "hugohrban/progen2-small-mix7" + ], + "relevancy_score": 46.6 }, { "architecture_id": "Zamba2ForCausalLM", - "total_models": 7, - "total_downloads": 111102, + "total_models": 8, + "total_downloads": 111280, "min_param_count": 1215064704, "sample_models": [ "Zyphra/Zamba2-1.2B-instruct", @@ -803,14 +703,63 @@ "EchoLabs33/zamba2-1.2b-hxq", "Zyphra/Zamba2-2.7B-instruct", "EchoLabs33/zamba2-2.7b-instruct-hxq", - "EchoLabs33/zamba2-7b-instruct-hxq" + "EchoLabs33/zamba2-7b-instruct-hxq", + "Zyphra/Zamba2-2.7B-Instruct-v2" ], - "relevancy_score": 46.6 + "relevancy_score": 46.4 + }, + { + "architecture_id": "Qwen3_5MoeForConditionalGeneration", + "total_models": 16, + "total_downloads": 65079, + "min_param_count": 5555793776, + "sample_models": [ + "Jackrong/Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled", + "caiovicentino1/Qwopus-MoE-35B-A3B-HLWQ-Q5", + "nightmedia/Qwen3.5-35B-A3B-Text-qx64-hi-mlx", + "Brooooooklyn/Qwen3.5-35B-A3B-UD-Q4_K_XL-mlx", + "nivvis/Qwen3.5-35B-A3B-EQ-v5", + "JANGQ-AI/Qwen3.5-397B-A17B-JANG_1L", + "Jackrong/MLX-Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled-bf16", + "nightmedia/Qwen3.5-122B-A10B-Text-qx85-mlx", + "JANGQ-AI/Qwen3.5-397B-A17B-JANG_2L", + "Kevletesteur/Qwen3.5-35B-A3B-Chimere-Distilled-BF16" + ], + "relevancy_score": 46.2 + }, + { + "architecture_id": "Ernie4_5_MoeForCausalLM", + "total_models": 5, + "total_downloads": 38765, + "min_param_count": 904040, + "sample_models": [ + "baidu/ERNIE-4.5-21B-A3B-PT", + "baidu/ERNIE-4.5-21B-A3B-Base-PT", + "baidu/ERNIE-4.5-21B-A3B-Thinking", + "yujiepan/ernie-4.5-moe-tiny-random", + "baidu/ERNIE-4.5-300B-A47B-PT" + ], + "relevancy_score": 46.2 + }, + { + "architecture_id": "Eagle3DraftModel", + "total_models": 6, + "total_downloads": 24433, + "min_param_count": 522152832, + "sample_models": [ + "RedHatAI/gpt-oss-20b-speculator.eagle3", + "RedHatAI/gpt-oss-120b-speculator.eagle3", + "RedHatAI/Qwen3-30B-A3B-Thinking-2507-speculator.eagle3", + "RedHatAI/Qwen3-235B-A22B-Instruct-2507-speculator.eagle3", + "RedHatAI/Qwen3-30B-A3B-Instruct-2507-speculator.eagle3", + "RedHatAI/Qwen3-30B-A3B-speculator.eagle3" + ], + "relevancy_score": 45.8 }, { "architecture_id": "AquilaForCausalLM", "total_models": 7, - "total_downloads": 17937, + "total_downloads": 17374, "min_param_count": 6425376, "sample_models": [ "BAAI/AquilaChat2-7B", @@ -821,28 +770,12 @@ "BAAI/AquilaChat2-34B-16K", "BAAI/Aquila2-70B-Expr" ], - "relevancy_score": 46.6 - }, - { - "architecture_id": "XverseForCausalLM", - "total_models": 7, - "total_downloads": 15816, - "min_param_count": 6459056, - "sample_models": [ - "xverse/XVERSE-7B-Chat", - "katuni4ka/tiny-random-xverse", - "xverse/XVERSE-13B-256K", - "xverse/XVERSE-13B", - "xverse/XVERSE-65B-Chat", - "xverse/XVERSE-13B-Chat", - "xverse/XVERSE-7B" - ], - "relevancy_score": 46.3 + "relevancy_score": 45.7 }, { "architecture_id": "ArceeForCausalLM", "total_models": 4, - "total_downloads": 36482, + "total_downloads": 37111, "min_param_count": 4129088, "sample_models": [ "arcee-ai/AFM-4.5B-Base", @@ -850,31 +783,28 @@ "onnx-internal-testing/tiny-random-ArceeForCausalLM", "arcee-ai/AFM-4.5B" ], - "relevancy_score": 46.1 + "relevancy_score": 45.5 }, { - "architecture_id": "Qwen3_5MoeForConditionalGeneration", - "total_models": 15, - "total_downloads": 45472, - "min_param_count": 6643527536, + "architecture_id": "XverseForCausalLM", + "total_models": 7, + "total_downloads": 15400, + "min_param_count": 6459056, "sample_models": [ - "Jackrong/Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled", - "nightmedia/Qwen3.5-35B-A3B-Text-qx64-hi-mlx", - "Jackrong/MLX-Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled-bf16", - "caiovicentino1/Qwopus-MoE-35B-A3B-PolarQuant-Q5", - "nivvis/Qwen3.5-35B-A3B-EQ-v5", - "Brooooooklyn/Qwen3.5-35B-A3B-UD-Q4_K_XL-mlx", - "JANGQ-AI/Qwen3.5-397B-A17B-JANG_1L", - "nightmedia/Qwen3.5-122B-A10B-Text-qx85-mlx", - "JANGQ-AI/Qwen3.5-397B-A17B-JANG_2L", - "Kevletesteur/Qwen3.5-35B-A3B-Chimere-Distilled-BF16" + "xverse/XVERSE-7B-Chat", + "katuni4ka/tiny-random-xverse", + "xverse/XVERSE-13B-256K", + "xverse/XVERSE-13B", + "xverse/XVERSE-65B-Chat", + "xverse/XVERSE-13B-Chat", + "xverse/XVERSE-7B" ], - "relevancy_score": 46.0 + "relevancy_score": 45.4 }, { "architecture_id": "LlavaQwen2ForCausalLM", "total_models": 5, - "total_downloads": 25580, + "total_downloads": 23815, "min_param_count": 758833760, "sample_models": [ "qnguyen3/nanoLLaVA", @@ -883,12 +813,26 @@ "apple/FastVLM-7B", "FreedomIntelligence/HuatuoGPT-Vision-7B" ], - "relevancy_score": 46.0 + "relevancy_score": 45.2 + }, + { + "architecture_id": "Llama4ForCausalLM", + "total_models": 5, + "total_downloads": 22089, + "min_param_count": 3269144, + "sample_models": [ + "trl-internal-testing/tiny-Llama4ForCausalLM", + "pruna-test/test-save-tiny-random-llama4-smashed", + "facebook/MobileLLM-R1.5-360M", + "facebook/MobileLLM-R1-950M", + "facebook/MobileLLM-R1-140M" + ], + "relevancy_score": 45.0 }, { "architecture_id": "SDARForCausalLM", "total_models": 6, - "total_downloads": 96427, + "total_downloads": 93909, "min_param_count": 2031739904, "sample_models": [ "JetLM/SDAR-1.7B-Chat", @@ -898,43 +842,25 @@ "JetLM/SDAR-4B-Chat", "JetLM/SDAR-4B-Chat-b32" ], - "relevancy_score": 45.6 + "relevancy_score": 44.8 }, { - "architecture_id": "DeepseekV2ForCausalLM", - "total_models": 14, - "total_downloads": 1552347, - "min_param_count": 15706484224, + "architecture_id": "SeedOssForCausalLM", + "total_models": 4, + "total_downloads": 25731, + "min_param_count": 2497064, "sample_models": [ - "deepseek-ai/DeepSeek-V2-Lite-Chat", - "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", - "deepseek-ai/DeepSeek-V2-Lite", - "deepseek-ai/DeepSeek-V2", - "deepseek-ai/DeepSeek-V2-Chat", - "deepseek-ai/DeepSeek-V2.5", - "deepseek-ai/DeepSeek-Coder-V2-Instruct", - "deepseek-ai/DeepSeek-V2-Chat-0628", - "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", - "Kwaipilot/KwaiCoder-DS-V2-Lite-Base" - ], - "relevancy_score": 45.2 - }, - { - "architecture_id": "BitNetForCausalLM", - "total_models": 3, - "total_downloads": 25988, - "min_param_count": 849787090, - "sample_models": [ - "microsoft/bitnet-b1.58-2B-4T", - "microsoft/bitnet-b1.58-2B-4T-bf16", - "iSolver-AI/FEnet" + "ByteDance-Seed/Seed-OSS-36B-Instruct", + "NousResearch/Hermes-4.3-36B", + "ByteDance-Seed/Seed-OSS-36B-Base", + "yujiepan/seed-oss-tiny-random" ], "relevancy_score": 44.7 }, { "architecture_id": "PldrllmForCausalLM", "total_models": 5, - "total_downloads": 13743, + "total_downloads": 14045, "min_param_count": 109689362, "sample_models": [ "fromthesky/PLDR-LLM-v51-SOC-110M-5", @@ -943,31 +869,43 @@ "fromthesky/PLDR-LLM-v51-SOC-110M-3", "fromthesky/PLDR-LLM-v51-SOC-110M-1" ], - "relevancy_score": 44.6 + "relevancy_score": 44.0 }, { - "architecture_id": "DeciLMForCausalLM", - "total_models": 13, - "total_downloads": 256626, - "min_param_count": 7043551232, + "architecture_id": "DeepseekV2ForCausalLM", + "total_models": 14, + "total_downloads": 1547180, + "min_param_count": 15706484224, "sample_models": [ - "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", - "nvidia/Llama-3_3-Nemotron-Super-49B-v1", - "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", - "ConicCat/Llama3_3-Nemo-Super-Writer-49B", - "nvidia/Llama-3_1-Nemotron-51B-Instruct", - "FriendliAI/Llama-3_3-Nemotron-Super-49B-v1_5", - "FriendliAI/Llama-3_1-Nemotron-Ultra-253B-v1", - "nvidia/Llama-3_1-Nemotron-Ultra-253B-CPT-v1", - "NewstaR/Porpoise-6b-instruct", - "Danielbrdz/Barcenas-6b" + "deepseek-ai/DeepSeek-V2-Lite-Chat", + "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", + "deepseek-ai/DeepSeek-V2-Lite", + "deepseek-ai/DeepSeek-V2", + "deepseek-ai/DeepSeek-V2-Chat", + "deepseek-ai/DeepSeek-V2.5", + "deepseek-ai/DeepSeek-Coder-V2-Instruct", + "deepseek-ai/DeepSeek-V2-Chat-0628", + "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", + "Kwaipilot/KwaiCoder-DS-V2-Lite-Base" + ], + "relevancy_score": 43.9 + }, + { + "architecture_id": "BitNetForCausalLM", + "total_models": 3, + "total_downloads": 23875, + "min_param_count": 849787090, + "sample_models": [ + "microsoft/bitnet-b1.58-2B-4T", + "microsoft/bitnet-b1.58-2B-4T-bf16", + "iSolver-AI/FEnet" ], - "relevancy_score": 44.5 + "relevancy_score": 43.9 }, { "architecture_id": "MoAMetricLM", "total_models": 5, - "total_downloads": 12168, + "total_downloads": 12515, "min_param_count": 69130608, "sample_models": [ "reaperdoesntknow/MoA-150M", @@ -976,24 +914,50 @@ "reaperdoesntknow/MoA-100M", "reaperdoesntknow/DiscoverLM-70M" ], - "relevancy_score": 44.4 + "relevancy_score": 43.7 }, { - "architecture_id": "Llama4ForCausalLM", - "total_models": 3, - "total_downloads": 20274, - "min_param_count": 3269144, + "architecture_id": "LlavaLlamaForCausalLM", + "total_models": 18, + "total_downloads": 64859, + "min_param_count": 7466764288, "sample_models": [ - "trl-internal-testing/tiny-Llama4ForCausalLM", - "pruna-test/test-save-tiny-random-llama4-smashed", - "facebook/MobileLLM-R1.5-360M" + "wisdomik/Quilt-Llava-v1.5-7b", + "LanguageBind/Video-LLaVA-7B", + "liuhaotian/llava-llama-2-13b-chat-lightning-preview", + "mmaaz60/LLaVA-7B-Lightening-v1-1", + "lmms-lab/llama3-llava-next-8b", + "microsoft/llava-med-7b-delta", + "deepcs233/VisCoT-7b-336", + "ManishThota/Ollama_Video_llama_7B", + "liuhaotian/LLaVA-Lightning-7B-delta-v1-1", + "EricPolaris/Quilt-Llava-v1.5-7b" + ], + "relevancy_score": 43.5 + }, + { + "architecture_id": "DeciLMForCausalLM", + "total_models": 13, + "total_downloads": 263391, + "min_param_count": 7043551232, + "sample_models": [ + "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", + "nvidia/Llama-3_3-Nemotron-Super-49B-v1", + "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "ConicCat/Llama3_3-Nemo-Super-Writer-49B", + "nvidia/Llama-3_1-Nemotron-51B-Instruct", + "FriendliAI/Llama-3_3-Nemotron-Super-49B-v1_5", + "FriendliAI/Llama-3_1-Nemotron-Ultra-253B-v1", + "nvidia/Llama-3_1-Nemotron-Ultra-253B-CPT-v1", + "NewstaR/Porpoise-6b-instruct", + "Danielbrdz/Barcenas-6b" ], - "relevancy_score": 44.2 + "relevancy_score": 43.4 }, { "architecture_id": "MBartForConditionalGeneration", "total_models": 6, - "total_downloads": 7379, + "total_downloads": 7575, "min_param_count": 379691717, "sample_models": [ "Pravopysnyk/best-unlp", @@ -1003,234 +967,221 @@ "MRNH/mbart-german-grammar-corrector", "MRNH/mbart-russian-grammar-corrector" ], - "relevancy_score": 43.9 + "relevancy_score": 43.3 }, { "architecture_id": "DogeForCausalLM", "total_models": 6, - "total_downloads": 7207, + "total_downloads": 7541, "min_param_count": 13118728, "sample_models": [ "SmallDoge/Doge-320M", - "SmallDoge/Doge-20M", "SmallDoge/Doge-160M", + "SmallDoge/Doge-20M", "SmallDoge/Doge-60M", "SmallDoge/Doge-120M-MoE", "SmallDoge/Doge-20M-MoE" ], - "relevancy_score": 43.9 + "relevancy_score": 43.3 }, { "architecture_id": "NemotronFlashForCausalLM", "total_models": 2, - "total_downloads": 21466, + "total_downloads": 23953, "min_param_count": 965389440, "sample_models": [ - "nvidia/Nemotron-Flash-3B", - "nvidia/Nemotron-Flash-1B" + "nvidia/Nemotron-Flash-1B", + "nvidia/Nemotron-Flash-3B" ], - "relevancy_score": 43.6 - }, - { - "architecture_id": "LlavaLlamaForCausalLM", - "total_models": 18, - "total_downloads": 33654, - "min_param_count": 7466764288, - "sample_models": [ - "LanguageBind/Video-LLaVA-7B", - "wisdomik/Quilt-Llava-v1.5-7b", - "liuhaotian/llava-llama-2-13b-chat-lightning-preview", - "lmms-lab/llama3-llava-next-8b", - "mmaaz60/LLaVA-7B-Lightening-v1-1", - "microsoft/llava-med-7b-delta", - "deepcs233/VisCoT-7b-336", - "ManishThota/Ollama_Video_llama_7B", - "liuhaotian/LLaVA-Lightning-7B-delta-v1-1", - "EricPolaris/Quilt-Llava-v1.5-7b" - ], - "relevancy_score": 43.4 + "relevancy_score": 43.3 }, { - "architecture_id": "Exaone4ForCausalLM", + "architecture_id": "EchoForCausalLM", "total_models": 3, - "total_downloads": 86753, - "min_param_count": 1279391488, + "total_downloads": 15499, + "min_param_count": 114687488, "sample_models": [ - "LGAI-EXAONE/EXAONE-4.0-1.2B", - "LGAI-EXAONE/EXAONE-4.0.1-32B", - "LGAI-EXAONE/EXAONE-4.0-32B" + "ethicalabs/Echo-DSRN-486M-v0.7.6-SFT", + "ethicalabs/Echo-DSRN-114M", + "ethicalabs/Echo-DSRN-114M-Base" ], - "relevancy_score": 43.4 + "relevancy_score": 43.0 }, { "architecture_id": "MiniMaxForCausalLM", "total_models": 2, - "total_downloads": 16499, + "total_downloads": 19980, "min_param_count": 231006264, "sample_models": [ "MiniMaxAI/MiniMax-Text-01-hf", "hyper-accel/tiny-random-minimax" ], - "relevancy_score": 43.0 + "relevancy_score": 42.9 }, { - "architecture_id": "EchoForCausalLM", - "total_models": 2, - "total_downloads": 14469, - "min_param_count": 114687488, - "sample_models": [ - "ethicalabs/Echo-DSRN-486M-v0.7.6-SFT", - "ethicalabs/Echo-DSRN-114M-Base" - ], - "relevancy_score": 42.7 - }, - { - "architecture_id": "AraGPT2LMHeadModel", + "architecture_id": "Exaone4ForCausalLM", "total_models": 3, - "total_downloads": 9685, - "min_param_count": 829369856, + "total_downloads": 69214, + "min_param_count": 1279391488, "sample_models": [ - "QCRI/Fanar-2-Diwan", - "aubmindlab/aragpt2-mega", - "aubmindlab/aragpt2-large" + "LGAI-EXAONE/EXAONE-4.0-1.2B", + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B" ], - "relevancy_score": 42.5 + "relevancy_score": 42.2 }, { - "architecture_id": "IlamaForCausalLM", - "total_models": 1, - "total_downloads": 105084, - "min_param_count": 1235814400, + "architecture_id": "DbrxForCausalLM", + "total_models": 2, + "total_downloads": 14052, + "min_param_count": 1612456, "sample_models": [ - "hmellor/Ilama-3.2-1B" + "trl-internal-testing/tiny-DbrxForCausalLM", + "katuni4ka/tiny-random-dbrx" ], - "relevancy_score": 42.5 + "relevancy_score": 42.1 }, { "architecture_id": "ModernBertForSequenceClassification", "total_models": 1, - "total_downloads": 17538, + "total_downloads": 18432, "min_param_count": 149609478, "sample_models": [ "opendatalab/meta-rater-professionalism-rating" ], - "relevancy_score": 42.5 + "relevancy_score": 42.1 }, { - "architecture_id": "LLaMAForCausalLM", - "total_models": 12, - "total_downloads": 21954, - "min_param_count": 6738425856, + "architecture_id": "Mistral3ForConditionalGeneration", + "total_models": 6, + "total_downloads": 163824, + "min_param_count": 4251743232, "sample_models": [ - "maicomputer/alpaca-13b", - "Enoch/llama-65b-hf", - "mncai/chatdoctor", - "AdaptLLM/law-LLM", - "Nitish-Garikoti/finance-LLM", - "boboto/LLaMA-65B-HF", - "AdaptLLM/finance-LLM", - "AdaptLLM/medicine-LLM", - "Rardilit/Panther_v1", - "James-WYang/BigTranslate" + "farbodtavakkoli/OTel-LLM-3B-IT", + "ArmGPT/ArmenianGPT-1.0-3B", + "JANGQ-AI/Mistral-Small-4-119B-A6B-JANG_2L", + "odytrice/kenichi-flash", + "JANGQ-AI/Mistral-Small-4-119B-A6B-JANG_6M", + "JANGQ-AI/Mistral-Small-4-119B-A6B-JANG_4M" ], - "relevancy_score": 42.4 + "relevancy_score": 42.0 }, { - "architecture_id": "RITAModelForCausalLM", + "architecture_id": "AraGPT2LMHeadModel", "total_models": 3, - "total_downloads": 9211, - "min_param_count": 85096320, + "total_downloads": 9765, + "min_param_count": 829369856, "sample_models": [ - "lightonai/RITA_s", - "lightonai/RITA_xl", - "lightonai/RITA_l" + "QCRI/Fanar-2-Diwan", + "aubmindlab/aragpt2-mega", + "aubmindlab/aragpt2-large" ], - "relevancy_score": 42.4 + "relevancy_score": 42.0 }, { "architecture_id": "StarVectorForCausalLM", "total_models": 2, - "total_downloads": 74194, + "total_downloads": 82922, "min_param_count": 1434095620, "sample_models": [ "starvector/starvector-1b-im2svg", "starvector/starvector-8b-im2svg" ], - "relevancy_score": 42.4 + "relevancy_score": 42.0 }, { - "architecture_id": "DbrxForCausalLM", - "total_models": 2, - "total_downloads": 12374, - "min_param_count": 1612456, - "sample_models": [ - "trl-internal-testing/tiny-DbrxForCausalLM", - "katuni4ka/tiny-random-dbrx" - ], - "relevancy_score": 42.4 - }, - { - "architecture_id": "Mistral3ForConditionalGeneration", - "total_models": 5, - "total_downloads": 163974, - "min_param_count": 4251743232, - "sample_models": [ - "farbodtavakkoli/OTel-LLM-3B-IT", - "ArmGPT/ArmenianGPT-1.0-3B", - "JANGQ-AI/Mistral-Small-4-119B-A6B-JANG_2L", - "odytrice/kenichi-flash", - "JANGQ-AI/Mistral-Small-4-119B-A6B-JANG_6M" - ], - "relevancy_score": 42.2 - }, - { - "architecture_id": "Plamo2ForCausalLM", - "total_models": 1, - "total_downloads": 81427, - "min_param_count": 1291441920, + "architecture_id": "RITAModelForCausalLM", + "total_models": 3, + "total_downloads": 9391, + "min_param_count": 85096320, "sample_models": [ - "pfnet/plamo-2-1b" + "lightonai/RITA_s", + "lightonai/RITA_xl", + "lightonai/RITA_l" ], "relevancy_score": 41.9 }, { "architecture_id": "OLMoForCausalLM", - "total_models": 6, - "total_downloads": 16483, + "total_models": 7, + "total_downloads": 17805, "min_param_count": 1176764416, "sample_models": [ "allenai/OLMo-7B-Instruct", "allenai/OLMo-7B", "allenai/OLMo-1B", "allenai/OLMo-7B-0424", + "Nhoodie/omni-dna-ici-dc", "allenai/OLMo-7B-Twin-2T", "allenai/OLMo-7B-SFT" ], + "relevancy_score": 41.8 + }, + { + "architecture_id": "IlamaForCausalLM", + "total_models": 1, + "total_downloads": 102422, + "min_param_count": 1235814400, + "sample_models": [ + "hmellor/Ilama-3.2-1B" + ], + "relevancy_score": 41.8 + }, + { + "architecture_id": "A2DQwen3LMHeadModel", + "total_models": 2, + "total_downloads": 11738, + "min_param_count": 751632384, + "sample_models": [ + "dllm-hub/Qwen3-0.6B-diffusion-mdlm-v0.1", + "dllm-hub/Qwen3-0.6B-diffusion-bd3lm-v0.1" + ], "relevancy_score": 41.7 }, { - "architecture_id": "MiniMaxM2ForCausalLM", - "total_models": 10, - "total_downloads": 970048, - "min_param_count": 18581099008, + "architecture_id": "NandiForCausalLM", + "total_models": 2, + "total_downloads": 10215, + "min_param_count": 153412928, "sample_models": [ - "MiniMaxAI/MiniMax-M2.5", - "cerebras/MiniMax-M2.1-REAP-139B-A10B", - "MiniMaxAI/MiniMax-M2", - "MiniMaxAI/MiniMax-M2.1", - "cerebras/MiniMax-M2.5-REAP-139B-A10B", - "aspctu/MiniMax-M2.5", - "dealignai/MiniMax-M2.5-UNCENSORED-JANG_2L", - "unsloth/MiniMax-M2.5", - "dealignai/MiniMax-M2.5-JANG_3L-CRACK", - "JANGQ-AI/MiniMax-M2.5-JANG_3L" + "Rta-AILabs/Nandi-Mini-150M", + "Rta-AILabs/Nandi-Mini-150M-Instruct" + ], + "relevancy_score": 41.4 + }, + { + "architecture_id": "LLaMAForCausalLM", + "total_models": 12, + "total_downloads": 21884, + "min_param_count": 6738425856, + "sample_models": [ + "maicomputer/alpaca-13b", + "Enoch/llama-65b-hf", + "mncai/chatdoctor", + "AdaptLLM/law-LLM", + "Nitish-Garikoti/finance-LLM", + "boboto/LLaMA-65B-HF", + "AdaptLLM/finance-LLM", + "AdaptLLM/medicine-LLM", + "Rardilit/Panther_v1", + "James-WYang/BigTranslate" + ], + "relevancy_score": 41.3 + }, + { + "architecture_id": "Plamo2ForCausalLM", + "total_models": 1, + "total_downloads": 81448, + "min_param_count": 1291441920, + "sample_models": [ + "pfnet/plamo-2-1b" ], - "relevancy_score": 41.5 + "relevancy_score": 41.3 }, { "architecture_id": "Starcoder2ForCausalLM", "total_models": 5, - "total_downloads": 117124, + "total_downloads": 116878, "min_param_count": 3030371328, "sample_models": [ "bigcode/starcoder2-3b", @@ -1239,48 +1190,36 @@ "bigcode/starcoder2-15b-instruct-v0.1", "dphn/dolphincoder-starcoder2-15b" ], - "relevancy_score": 41.4 - }, - { - "architecture_id": "GlmForCausalLM", - "total_models": 4, - "total_downloads": 23486, - "min_param_count": 1593427968, - "sample_models": [ - "zai-org/glm-4-9b-chat-hf", - "zai-org/glm-4-9b-hf", - "zai-org/glm-edge-4b-chat", - "zai-org/glm-edge-1.5b-chat" - ], - "relevancy_score": 41.2 + "relevancy_score": 40.6 }, { "architecture_id": "MolformerForCausalLM", "total_models": 2, - "total_downloads": 7302, + "total_downloads": 6850, "min_param_count": 46805760, "sample_models": [ "ibm-research/GP-MoLFormer-Uniq", "ralyn/NPComposer-v2" ], - "relevancy_score": 41.2 + "relevancy_score": 40.6 }, { - "architecture_id": "MptForCausalLM", - "total_models": 3, - "total_downloads": 4577, - "min_param_count": 405032, + "architecture_id": "GlmForCausalLM", + "total_models": 4, + "total_downloads": 23066, + "min_param_count": 1593427968, "sample_models": [ - "yujiepan/mpt-tiny-random", - "explosion-testing/mpt-test", - "team-lucid/mptk-1b" + "zai-org/glm-4-9b-chat-hf", + "zai-org/glm-4-9b-hf", + "zai-org/glm-edge-4b-chat", + "zai-org/glm-edge-1.5b-chat" ], - "relevancy_score": 40.8 + "relevancy_score": 40.5 }, { "architecture_id": "Glm4MoeLiteForCausalLM", - "total_models": 8, - "total_downloads": 1257096, + "total_models": 9, + "total_downloads": 1217856, "min_param_count": 22996118432, "sample_models": [ "zai-org/GLM-4.7-Flash", @@ -1290,79 +1229,85 @@ "Olafangensan/GLM-4.7-Flash-heretic", "Ex0bit/GLM-4.7-Flash-PRISM", "jerrycheng233/model5_sft_16bit", - "aaravriyer193/chimpgpt-coder-elite" + "aaravriyer193/chimpgpt-coder-elite", + "austindixson/glm-4.7-flash-Opus-Reasoning" ], - "relevancy_score": 40.7 + "relevancy_score": 40.3 }, { - "architecture_id": "LLaDAModelLM", - "total_models": 4, - "total_downloads": 682726, - "min_param_count": 8015581184, + "architecture_id": "MptForCausalLM", + "total_models": 3, + "total_downloads": 4595, + "min_param_count": 405032, "sample_models": [ - "GSAI-ML/LLaDA-8B-Instruct", - "GSAI-ML/LLaDA-8B-Base", - "GSAI-ML/LLaDA-1.5", - "d3LLM/d3LLM_LLaDA" + "yujiepan/mpt-tiny-random", + "explosion-testing/mpt-test", + "team-lucid/mptk-1b" ], - "relevancy_score": 40.7 + "relevancy_score": 40.3 }, { - "architecture_id": "NandiForCausalLM", - "total_models": 1, - "total_downloads": 7981, - "min_param_count": 153412928, + "architecture_id": "Llama4ForConditionalGeneration", + "total_models": 2, + "total_downloads": 5844, + "min_param_count": 6686880, "sample_models": [ - "Rta-AILabs/Nandi-Mini-150M" + "yujiepan/llama-4-tiny-random", + "Mogith/Llama-4-Scout-17B-16E-Instruct-Q8_0" ], - "relevancy_score": 40.7 + "relevancy_score": 40.2 }, { "architecture_id": "DuchifatCore", "total_models": 3, - "total_downloads": 4079, + "total_downloads": 4086, "min_param_count": 136763904, "sample_models": [ "Raziel1234/Duchifat-2", "razielAI/Duchifat-2.1-Instruct", "TopAI-1/Duchifat-2-Instruct" ], - "relevancy_score": 40.6 + "relevancy_score": 40.1 + }, + { + "architecture_id": "LLaDAModelLM", + "total_models": 4, + "total_downloads": 659922, + "min_param_count": 8015581184, + "sample_models": [ + "GSAI-ML/LLaDA-8B-Instruct", + "GSAI-ML/LLaDA-8B-Base", + "GSAI-ML/LLaDA-1.5", + "d3LLM/d3LLM_LLaDA" + ], + "relevancy_score": 39.8 }, { "architecture_id": "GLAForCausalLM", "total_models": 2, - "total_downloads": 5043, + "total_downloads": 4823, "min_param_count": 341707776, "sample_models": [ - "fla-hub/gla-340M-15B", - "fla-hub/gla-1.3B-100B" + "fla-hub/gla-1.3B-100B", + "fla-hub/gla-340M-15B" ], - "relevancy_score": 40.4 + "relevancy_score": 39.8 }, { - "architecture_id": "RWForCausalLM", - "total_models": 11, - "total_downloads": 11851, - "min_param_count": 6854619456, + "architecture_id": "BertLMHeadModel", + "total_models": 2, + "total_downloads": 4589, + "min_param_count": 184474880, "sample_models": [ - "projecte-aina/aguila-7b", - "lightonai/alfred-40b-1023", - "explosion-testing/refined-web-model-test", - "vilm/vulture-40b", - "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2", - "nomic-ai/gpt4all-falcon", - "OpenAssistant/falcon-40b-sft-top1-560", - "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3", - "QuixiAI/WizardLM-Uncensored-Falcon-40b", - "mrm8488/falcoder-7b" + "dicta-il/BEREL_3.0", + "hf-tiny-model-private/tiny-random-BertLMHeadModel" ], - "relevancy_score": 40.3 + "relevancy_score": 39.7 }, { "architecture_id": "Lfm2MoeForCausalLM", "total_models": 8, - "total_downloads": 175258, + "total_downloads": 187579, "min_param_count": 8339929856, "sample_models": [ "farbodtavakkoli/OTel-LLM-24B-IT", @@ -1371,818 +1316,905 @@ "LiquidAI/LFM2-8B-A1B-ONNX", "LiquidAI/LFM2-24B-A2B-ONNX", "unsloth/LFM2-8B-A1B", - "huihui-ai/Huihui-LFM2-24B-A2B-abliterated", - "MuXodious/LFM2-8B-A1B-absolute-heresy-MPOA" + "MuXodious/LFM2-8B-A1B-absolute-heresy-MPOA", + "huihui-ai/Huihui-LFM2-24B-A2B-abliterated" ], - "relevancy_score": 40.3 + "relevancy_score": 39.5 }, { - "architecture_id": "BertLMHeadModel", - "total_models": 2, - "total_downloads": 4599, - "min_param_count": 184474880, + "architecture_id": "RWForCausalLM", + "total_models": 11, + "total_downloads": 11570, + "min_param_count": 6854619456, "sample_models": [ - "dicta-il/BEREL_3.0", - "hf-tiny-model-private/tiny-random-BertLMHeadModel" + "projecte-aina/aguila-7b", + "lightonai/alfred-40b-1023", + "explosion-testing/refined-web-model-test", + "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2", + "vilm/vulture-40b", + "nomic-ai/gpt4all-falcon", + "OpenAssistant/falcon-40b-sft-top1-560", + "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3", + "QuixiAI/WizardLM-Uncensored-Falcon-40b", + "mrm8488/falcoder-7b" ], - "relevancy_score": 40.2 + "relevancy_score": 39.3 }, { - "architecture_id": "Llama4ForConditionalGeneration", - "total_models": 1, - "total_downloads": 6224, - "min_param_count": 6686880, + "architecture_id": "GPJTGPT2ModelForCausalLM", + "total_models": 4, + "total_downloads": 2088, + "min_param_count": 175592448, "sample_models": [ - "yujiepan/llama-4-tiny-random" + "gpjt/8xa100m40-baseline-3", + "gpjt/8xa100m40-baseline-2", + "gpjt/8xa100m40-baseline-8", + "gpjt/8xa100m40-baseline-7" ], - "relevancy_score": 40.2 + "relevancy_score": 39.2 }, { "architecture_id": "AfmoeForCausalLM", "total_models": 6, - "total_downloads": 45755, + "total_downloads": 44009, "min_param_count": 6120003328, "sample_models": [ - "arcee-ai/Trinity-Nano-Preview", "arcee-ai/Trinity-Large-Thinking", + "arcee-ai/Trinity-Nano-Preview", "arcee-ai/Trinity-Mini", "arcee-ai/Trinity-Nano-Base", "arcee-ai/Trinity-Mini-Base", "arcee-ai/Trinity-Large-Preview" ], - "relevancy_score": 40.0 + "relevancy_score": 39.1 + }, + { + "architecture_id": "GPTJXMoEForCausalLM", + "total_models": 1, + "total_downloads": 4510, + "min_param_count": 489915648, + "sample_models": [ + "Aletheia-ng/SabiYarn_MoE_translate" + ], + "relevancy_score": 39.0 + }, + { + "architecture_id": "GatedDeltaNetForCausalLM", + "total_models": 1, + "total_downloads": 4069, + "min_param_count": 317524480, + "sample_models": [ + "deqing/gdn-300M-v5-gdn" + ], + "relevancy_score": 38.8 }, { "architecture_id": "BitnetForCausalLM", "total_models": 2, - "total_downloads": 3594, + "total_downloads": 2937, "min_param_count": 728843904, "sample_models": [ "1bitLLM/bitnet_b1_58-large", "1bitLLM/bitnet_b1_58-3B" ], - "relevancy_score": 39.6 + "relevancy_score": 38.7 }, { "architecture_id": "RecurrentGemmaForCausalLM", "total_models": 3, - "total_downloads": 13166, + "total_downloads": 13093, "min_param_count": 2682862080, "sample_models": [ "google/recurrentgemma-2b", "google/recurrentgemma-2b-it", "google/recurrentgemma-9b" ], - "relevancy_score": 39.2 - }, - { - "architecture_id": "GatedDeltaNetForCausalLM", - "total_models": 1, - "total_downloads": 4063, - "min_param_count": 317524480, - "sample_models": [ - "deqing/gdn-300M-v5-gdn" - ], - "relevancy_score": 39.2 + "relevancy_score": 38.6 }, { "architecture_id": "RecursiveLanguageModel", "total_models": 1, - "total_downloads": 3560, + "total_downloads": 3401, "min_param_count": 198464806, "sample_models": [ "Girinath11/recursive-language-model-198m" ], - "relevancy_score": 38.9 + "relevancy_score": 38.4 }, { "architecture_id": "T5EncoderModel", "total_models": 1, - "total_downloads": 122326, + "total_downloads": 119882, "min_param_count": 4762310656, "sample_models": [ "XLabs-AI/xflux_text_encoders" ], - "relevancy_score": 38.8 - }, - { - "architecture_id": "AprielForCausalLM", - "total_models": 1, - "total_downloads": 113509, - "min_param_count": 4832071680, - "sample_models": [ - "ServiceNow-AI/Apriel-5B-Instruct" - ], - "relevancy_score": 38.7 - }, - { - "architecture_id": "GPTJXMoEForCausalLM", - "total_models": 1, - "total_downloads": 3210, - "min_param_count": 489915648, - "sample_models": [ - "Aletheia-ng/SabiYarn_MoE_translate" - ], - "relevancy_score": 38.7 + "relevancy_score": 38.2 }, { "architecture_id": "LLM", "total_models": 1, - "total_downloads": 3059, + "total_downloads": 3132, "min_param_count": 497145984, "sample_models": [ "rudyon/linnet-497M" ], - "relevancy_score": 38.6 - }, - { - "architecture_id": "DreamModel", - "total_models": 6, - "total_downloads": 140463, - "min_param_count": 7615616512, - "sample_models": [ - "Dream-org/Dream-v0-Instruct-7B", - "Dream-org/Dream-v0-Base-7B", - "Dream-org/Dream-Coder-v0-Instruct-7B", - "Zigeng/dParallel_Dream_7B_Instruct", - "Dream-org/Dream-Coder-v0-Base-7B", - "Dream-org/DreamOn-v0-7B" - ], - "relevancy_score": 38.5 + "relevancy_score": 38.2 }, { "architecture_id": "SwarmForCausalLM", "total_models": 1, - "total_downloads": 2979, + "total_downloads": 3059, "min_param_count": 52729731, "sample_models": [ "reaperdoesntknow/SAGI" ], - "relevancy_score": 38.5 + "relevancy_score": 38.2 }, { - "architecture_id": "GPJTGPT2ModelForCausalLM", - "total_models": 3, - "total_downloads": 1570, - "min_param_count": 175592448, + "architecture_id": "AprielForCausalLM", + "total_models": 1, + "total_downloads": 113197, + "min_param_count": 4832071680, "sample_models": [ - "gpjt/8xa100m40-baseline-3", - "gpjt/8xa100m40-baseline-2", - "gpjt/8xa100m40-baseline-7" + "ServiceNow-AI/Apriel-5B-Instruct" ], - "relevancy_score": 38.4 + "relevancy_score": 38.1 }, { "architecture_id": "SpatialLMQwenForCausalLM", "total_models": 1, - "total_downloads": 2768, + "total_downloads": 2930, "min_param_count": 603511168, "sample_models": [ "manycore-research/SpatialLM1.1-Qwen-0.5B" ], - "relevancy_score": 38.4 + "relevancy_score": 38.1 }, { "architecture_id": "MiniMindForCausalLM", "total_models": 2, - "total_downloads": 2005, + "total_downloads": 2151, "min_param_count": 38840960, "sample_models": [ "yiwenX/MiniMind-MoE-640-120M", "chujiamo/baiheng_0405" ], - "relevancy_score": 38.3 + "relevancy_score": 38.0 + }, + { + "architecture_id": "DreamModel", + "total_models": 6, + "total_downloads": 153501, + "min_param_count": 7615616512, + "sample_models": [ + "Dream-org/Dream-v0-Instruct-7B", + "Dream-org/Dream-v0-Base-7B", + "Dream-org/Dream-Coder-v0-Instruct-7B", + "Zigeng/dParallel_Dream_7B_Instruct", + "Dream-org/Dream-Coder-v0-Base-7B", + "Dream-org/DreamOn-v0-7B" + ], + "relevancy_score": 37.9 }, { "architecture_id": "AV2TextForConditionalGeneration", "total_models": 1, - "total_downloads": 2566, + "total_downloads": 2689, "min_param_count": 480465000, "sample_models": [ "nguyenvulebinh/AV-HuBERT-MuAViC-en" ], - "relevancy_score": 38.2 + "relevancy_score": 37.9 }, { "architecture_id": "BD3LM", "total_models": 2, - "total_downloads": 1793, + "total_downloads": 1953, "min_param_count": 169627250, "sample_models": [ "kuleshov-group/bd3lm-owt-block_size4", "kuleshov-group/bd3lm-owt-block_size16" ], - "relevancy_score": 38.1 - }, - { - "architecture_id": "HCXVisionV2ForCausalLM", - "total_models": 2, - "total_downloads": 356837, - "min_param_count": 10741664520, - "sample_models": [ - "naver-hyperclovax/HyperCLOVAX-SEED-Omni-8B", - "naver-hyperclovax/HyperCLOVAX-SEED-Think-32B" - ], - "relevancy_score": 37.9 + "relevancy_score": 37.8 }, { - "architecture_id": "PenguinVLQwen3ForCausalLM", + "architecture_id": "ForCausalLM", "total_models": 2, - "total_downloads": 10102, - "min_param_count": 2167941120, + "total_downloads": 1725, + "min_param_count": 748801603, "sample_models": [ - "tencent/Penguin-VL-8B", - "tencent/Penguin-VL-2B" + "kyr0/Gemma-4-Waldwicht-Sproessling", + "kyr0/Gemma-4-Waldwicht-Winzling" ], - "relevancy_score": 37.9 + "relevancy_score": 37.5 }, { "architecture_id": "BlenderbotForConditionalGeneration", "total_models": 1, - "total_downloads": 2226, + "total_downloads": 2289, "min_param_count": 364810568, "sample_models": [ "thu-coai/blenderbot-400M-esconv" ], - "relevancy_score": 37.9 + "relevancy_score": 37.5 }, { "architecture_id": "Autoencoder", "total_models": 1, - "total_downloads": 2223, + "total_downloads": 2278, "min_param_count": 75832064, "sample_models": [ "cccczshao/CALM-Autoencoder" ], - "relevancy_score": 37.9 + "relevancy_score": 37.5 }, { "architecture_id": "EveMoEForCausalLM", "total_models": 1, - "total_downloads": 2123, + "total_downloads": 2174, "min_param_count": 271970816, "sample_models": [ "anthonym21/Eve-2-MoE-IT-272M" ], - "relevancy_score": 37.8 + "relevancy_score": 37.4 }, { "architecture_id": "FusionInDecoderForConditionalGeneration", "total_models": 1, - "total_downloads": 2105, + "total_downloads": 2146, "min_param_count": 247577856, "sample_models": [ "Intel/fid_flan_t5_base_nq" ], - "relevancy_score": 37.8 + "relevancy_score": 37.4 }, { "architecture_id": "Plamo3ForCausalLM", "total_models": 1, - "total_downloads": 12290, + "total_downloads": 12925, "min_param_count": 2603344384, "sample_models": [ "pfnet/plamo-3-nict-2b-base" ], - "relevancy_score": 37.7 + "relevancy_score": 37.3 + }, + { + "architecture_id": "TransformerForCausalLM", + "total_models": 1, + "total_downloads": 12840, + "min_param_count": 1364297728, + "sample_models": [ + "fla-hub/transformer-1.3B-100B" + ], + "relevancy_score": 37.3 }, { "architecture_id": "LIMEForCausalLM", "total_models": 1, - "total_downloads": 2043, + "total_downloads": 2102, "min_param_count": 984405504, "sample_models": [ "anarlavrenov/lime-1b-instruct" ], - "relevancy_score": 37.7 + "relevancy_score": 37.3 }, { "architecture_id": "ModernBertForMaskedLM", "total_models": 1, - "total_downloads": 2007, + "total_downloads": 2068, "min_param_count": 590367063, "sample_models": [ "JorgeVanco/diffusionGPT" ], - "relevancy_score": 37.6 + "relevancy_score": 37.3 + }, + { + "architecture_id": "HCXVisionV2ForCausalLM", + "total_models": 2, + "total_downloads": 354662, + "min_param_count": 10741664520, + "sample_models": [ + "naver-hyperclovax/HyperCLOVAX-SEED-Omni-8B", + "naver-hyperclovax/HyperCLOVAX-SEED-Think-32B" + ], + "relevancy_score": 37.2 }, { "architecture_id": "MoEGPTForCausalLM", "total_models": 1, - "total_downloads": 1960, + "total_downloads": 1943, "min_param_count": 149603328, "sample_models": [ "arnomatic/german-moe-gpt-v8-pretrained" ], - "relevancy_score": 37.6 + "relevancy_score": 37.2 + }, + { + "architecture_id": "ParamBharatGenForCausalLM", + "total_models": 4, + "total_downloads": 5035, + "min_param_count": 2860664832, + "sample_models": [ + "bharatgenai/Param-1-5B", + "bharatgenai/AyurParam", + "bharatgenai/Param-1-2.9B-Instruct", + "bharatgenai/AgriParam" + ], + "relevancy_score": 37.1 }, { - "architecture_id": "TransformerForCausalLM", + "architecture_id": "LanceAI", "total_models": 1, - "total_downloads": 11223, - "min_param_count": 1364297728, + "total_downloads": 1887, + "min_param_count": 137022720, + "sample_models": [ + "NeuraCraft/Lance-AI-V2" + ], + "relevancy_score": 37.1 + }, + { + "architecture_id": "RWKV7ForCausalLM", + "total_models": 2, + "total_downloads": 1332, + "min_param_count": 381332480, "sample_models": [ - "fla-hub/transformer-1.3B-100B" + "puigde/rwkv7-380M-15B-slimpajama", + "fla-hub/rwkv7-1.5B-world" ], - "relevancy_score": 37.5 + "relevancy_score": 37.0 }, { "architecture_id": "LSTMForCausalLM", "total_models": 1, - "total_downloads": 1768, + "total_downloads": 1772, "min_param_count": 164921344, "sample_models": [ "deqing/lstm-window-4-v5" ], - "relevancy_score": 37.4 + "relevancy_score": 37.0 }, { "architecture_id": "NanoChatForCausalLM", "total_models": 3, - "total_downloads": 5777, + "total_downloads": 6167, "min_param_count": 2217082880, "sample_models": [ "Twobombs/nanochat-d34-sft-hf", "pankajmathur/nanochat-d34-sft-hf", "Nekochu/nanochat-d24" ], - "relevancy_score": 37.3 + "relevancy_score": 36.9 }, { - "architecture_id": "Qwen3ASRForConditionalGeneration", + "architecture_id": "PenguinVLQwen3ForCausalLM", "total_models": 2, - "total_downloads": 1203, - "min_param_count": 782426112, + "total_downloads": 7957, + "min_param_count": 2167941120, "sample_models": [ - "bezzam/Qwen3-ASR-0.6B", - "hypaai/Qwen3-ASR-0.6B_2026-03-22_04-35-10" + "tencent/Penguin-VL-8B", + "tencent/Penguin-VL-2B" ], - "relevancy_score": 37.2 + "relevancy_score": 36.9 }, { "architecture_id": "Moondream", "total_models": 1, - "total_downloads": 9755, + "total_downloads": 10509, "min_param_count": 1857482608, "sample_models": [ "vikhyatk/moondream1" ], - "relevancy_score": 37.2 + "relevancy_score": 36.9 }, { - "architecture_id": "CircuitGPTForCausalLM", - "total_models": 1, - "total_downloads": 1414, - "min_param_count": 419124736, + "architecture_id": "GPT3DevLMHeadModel", + "total_models": 2, + "total_downloads": 1135, + "min_param_count": 125226240, "sample_models": [ - "openai/circuit-sparsity" + "k050506koch/GPT3-dev-350m-2805", + "k050506koch/GPT3-dev-125m-0104" ], - "relevancy_score": 36.9 + "relevancy_score": 36.6 }, { - "architecture_id": "ParamBharatGenForCausalLM", - "total_models": 3, - "total_downloads": 4535, - "min_param_count": 2860673024, + "architecture_id": "D3LMForMaskedLM", + "total_models": 1, + "total_downloads": 1260, + "min_param_count": 55905164, "sample_models": [ - "bharatgenai/Param-1-5B", - "bharatgenai/AyurParam", - "bharatgenai/Param-1-2.9B-Instruct" + "Hengchang-Liu/D3LM-from-nt" ], - "relevancy_score": 36.8 + "relevancy_score": 36.2 }, { - "architecture_id": "SliderGPT", + "architecture_id": "CircuitGPTForCausalLM", "total_models": 1, - "total_downloads": 1377, - "min_param_count": 47420936, + "total_downloads": 1247, + "min_param_count": 419124736, "sample_models": [ - "c-bone/CrystaLLM-pi_Mattergen-XRD" + "openai/circuit-sparsity" ], - "relevancy_score": 36.8 + "relevancy_score": 36.2 }, { - "architecture_id": "YoutuForCausalLM", - "total_models": 2, - "total_downloads": 4389, - "min_param_count": 1961560064, + "architecture_id": "PanguEmbeddedForCausalLM", + "total_models": 1, + "total_downloads": 7396, + "min_param_count": 1391497728, "sample_models": [ - "tencent/Youtu-LLM-2B-Base", - "tencent/Youtu-LLM-2B" + "FreedomIntelligence/openPangu-Embedded-1B" ], "relevancy_score": 36.1 }, { - "architecture_id": "DUO", + "architecture_id": "RubiRLM", "total_models": 1, - "total_downloads": 995, - "min_param_count": 169627250, + "total_downloads": 1142, + "min_param_count": 988446027, "sample_models": [ - "s-sahoo/duo-distilled" + "DevHunterAI/RubiRLM-1B-Base" ], - "relevancy_score": 36.1 + "relevancy_score": 36.0 }, { "architecture_id": "Rwkv7ForCausalLM", "total_models": 1, - "total_downloads": 970, + "total_downloads": 1092, "min_param_count": 34158592, "sample_models": [ "admijgjtjtjtjjg/dfdfdf" ], - "relevancy_score": 36.0 + "relevancy_score": 35.9 }, { - "architecture_id": "RubiRLM", + "architecture_id": "SliderGPT", "total_models": 1, - "total_downloads": 964, - "min_param_count": 988446027, + "total_downloads": 1069, + "min_param_count": 47420936, "sample_models": [ - "DevHunterAI/RubiRLM-1B-Base" + "c-bone/CrystaLLM-pi_Mattergen-XRD" ], - "relevancy_score": 36.0 + "relevancy_score": 35.9 }, { - "architecture_id": "RavenForCausalLM", + "architecture_id": "YoutuForCausalLM", "total_models": 2, - "total_downloads": 4033, - "min_param_count": 1385228288, + "total_downloads": 4259, + "min_param_count": 1961560064, "sample_models": [ - "tomg-group-umd/huginn-0125", - "smcleish/Recurrent-Llama-3.2-train-recurrence-32" + "tencent/Youtu-LLM-2B-Base", + "tencent/Youtu-LLM-2B" ], - "relevancy_score": 35.9 + "relevancy_score": 35.5 }, { "architecture_id": "GTLMForCausalLM", "total_models": 2, - "total_downloads": 4011, + "total_downloads": 4210, "min_param_count": 2095989760, "sample_models": [ "Madras1/GTLM-1-2B-A350M", "Madras1/GTLM-1-2B-A350M-fp16" ], - "relevancy_score": 35.9 - }, - { - "architecture_id": "PanguEmbeddedForCausalLM", - "total_models": 1, - "total_downloads": 5621, - "min_param_count": 1391497728, - "sample_models": [ - "FreedomIntelligence/openPangu-Embedded-1B" - ], - "relevancy_score": 35.9 + "relevancy_score": 35.5 }, { "architecture_id": "SoraForSLM", "total_models": 1, - "total_downloads": 901, + "total_downloads": 915, "min_param_count": 450707456, "sample_models": [ "Conlanger-LLM-CLEM/Sorie" ], - "relevancy_score": 35.9 - }, - { - "architecture_id": "MoshiForConditionalGeneration", - "total_models": 2, - "total_downloads": 133468, - "min_param_count": 7783880545, - "sample_models": [ - "kmhf/hf-moshiko", - "kmhf/hf-moshika" - ], - "relevancy_score": 35.7 - }, - { - "architecture_id": "MiMoForCausalLM", - "total_models": 2, - "total_downloads": 135798, - "min_param_count": 7833409536, - "sample_models": [ - "XiaomiMiMo/MiMo-7B-Base", - "XiaomiMiMo/MiMo-7B-RL" - ], - "relevancy_score": 35.7 + "relevancy_score": 35.5 }, { "architecture_id": "HGRNBitForCausalLM", "total_models": 1, - "total_downloads": 854, + "total_downloads": 874, "min_param_count": 374108160, "sample_models": [ "ridger/MMfreeLM-370M" ], - "relevancy_score": 35.7 + "relevancy_score": 35.4 }, { "architecture_id": "DotLMForCausalLM", "total_models": 1, - "total_downloads": 823, + "total_downloads": 825, "min_param_count": 176204544, "sample_models": [ "tensorfiend/DotLM-165M" ], - "relevancy_score": 35.7 + "relevancy_score": 35.3 }, { - "architecture_id": "D3LMForMaskedLM", - "total_models": 1, - "total_downloads": 790, - "min_param_count": 55905164, + "architecture_id": "LLaDA2MoeModelLM", + "total_models": 6, + "total_downloads": 289792, + "min_param_count": 16255643392, "sample_models": [ - "Hengchang-Liu/D3LM-from-nt" + "inclusionAI/LLaDA2.1-flash", + "inclusionAI/LLaDA2.0-mini", + "inclusionAI/LLaDA2.1-mini", + "inclusionAI/LLaDA2.0-mini-CAP", + "inclusionAI/LLaDA2.0-flash", + "Zigeng/DMax-Coder-16B" ], - "relevancy_score": 35.6 + "relevancy_score": 35.2 }, { - "architecture_id": "DeltaNetForCausalLM", + "architecture_id": "RavenForCausalLM", + "total_models": 2, + "total_downloads": 3635, + "min_param_count": 1385228288, + "sample_models": [ + "tomg-group-umd/huginn-0125", + "smcleish/Recurrent-Llama-3.2-train-recurrence-32" + ], + "relevancy_score": 35.2 + }, + { + "architecture_id": "DUO", "total_models": 1, - "total_downloads": 4588, - "min_param_count": 1365677056, + "total_downloads": 789, + "min_param_count": 169627250, "sample_models": [ - "fla-hub/delta_net-1.3B-100B" + "s-sahoo/duo-distilled" ], - "relevancy_score": 35.5 + "relevancy_score": 35.2 }, { - "architecture_id": "VaultGemmaForCausalLM", + "architecture_id": "DeltaNetForCausalLM", "total_models": 1, - "total_downloads": 4209, - "min_param_count": 1038741120, + "total_downloads": 4682, + "min_param_count": 1365677056, "sample_models": [ - "google/vaultgemma-1b" + "fla-hub/delta_net-1.3B-100B" ], - "relevancy_score": 35.3 + "relevancy_score": 35.1 }, { - "architecture_id": "Rwkv5ForCausalLM", + "architecture_id": "MoshiForConditionalGeneration", "total_models": 2, - "total_downloads": 2960, - "min_param_count": 1577754624, + "total_downloads": 128966, + "min_param_count": 7783880545, "sample_models": [ - "RWKV/rwkv-5-world-3b", - "RWKV/rwkv-5-world-1b5" + "kmhf/hf-moshiko", + "kmhf/hf-moshika" ], - "relevancy_score": 35.2 + "relevancy_score": 35.0 }, { - "architecture_id": "LLaDA2MoeModelLM", - "total_models": 5, - "total_downloads": 245306, - "min_param_count": 16255643392, + "architecture_id": "MiMoForCausalLM", + "total_models": 2, + "total_downloads": 132837, + "min_param_count": 7833409536, "sample_models": [ - "inclusionAI/LLaDA2.1-flash", - "inclusionAI/LLaDA2.0-mini", - "inclusionAI/LLaDA2.1-mini", - "inclusionAI/LLaDA2.0-mini-CAP", - "inclusionAI/LLaDA2.0-flash" + "XiaomiMiMo/MiMo-7B-Base", + "XiaomiMiMo/MiMo-7B-RL" ], - "relevancy_score": 35.1 + "relevancy_score": 35.0 }, { - "architecture_id": "RWKV7ForCausalLM", - "total_models": 3, - "total_downloads": 2153, - "min_param_count": 1527404544, + "architecture_id": "Rwkv5ForCausalLM", + "total_models": 2, + "total_downloads": 3092, + "min_param_count": 1577754624, "sample_models": [ - "RWKV/RWKV7-Goose-World3-1.5B-HF", - "fla-hub/rwkv7-1.5B-world", - "RWKV/RWKV7-Goose-World3-2.9B-HF" + "RWKV/rwkv-5-world-3b", + "RWKV/rwkv-5-world-1b5" ], - "relevancy_score": 35.1 + "relevancy_score": 34.8 }, { - "architecture_id": "MegaForCausalLM", + "architecture_id": "VaultGemmaForCausalLM", "total_models": 1, - "total_downloads": 613, - "min_param_count": 126132108, + "total_downloads": 4072, + "min_param_count": 1038741120, "sample_models": [ - "BEE-spoke-data/mega-ar-126m-4k" + "google/vaultgemma-1b" ], - "relevancy_score": 35.0 + "relevancy_score": 34.8 }, { "architecture_id": "WordLatentTransformerForCausalLM", "total_models": 1, - "total_downloads": 583, + "total_downloads": 654, "min_param_count": 6861056, "sample_models": [ "sign/WeLT-string-repetition" ], - "relevancy_score": 34.9 + "relevancy_score": 34.8 + }, + { + "architecture_id": "LilleForCausalLM", + "total_models": 1, + "total_downloads": 604, + "min_param_count": 127236768, + "sample_models": [ + "Nikity/lille-130m-instruct" + ], + "relevancy_score": 34.6 }, { "architecture_id": "KimiK2ForCausalLM", "total_models": 1, - "total_downloads": 583, + "total_downloads": 586, "min_param_count": 170595012, "sample_models": [ "hyper-accel/tiny-random-kimi-k2" ], - "relevancy_score": 34.9 + "relevancy_score": 34.6 }, { - "architecture_id": "LilleForCausalLM", - "total_models": 1, - "total_downloads": 565, - "min_param_count": 127236768, + "architecture_id": "RuGPT3XLForCausalLM", + "total_models": 2, + "total_downloads": 2650, + "min_param_count": 1431261184, "sample_models": [ - "Nikity/lille-130m-instruct" + "evilfreelancer/ruGPT3XL", + "evilfreelancer/ruGPT3XL-8k" ], - "relevancy_score": 34.8 + "relevancy_score": 34.5 }, { "architecture_id": "GPT2CompetitiveMoE", "total_models": 1, - "total_downloads": 527, + "total_downloads": 528, "min_param_count": 497796864, "sample_models": [ "Fu01978/gpt2-4x124M-competitive-moe" ], - "relevancy_score": 34.7 + "relevancy_score": 34.3 + }, + { + "architecture_id": "MegaForCausalLM", + "total_models": 1, + "total_downloads": 522, + "min_param_count": 126132108, + "sample_models": [ + "BEE-spoke-data/mega-ar-126m-4k" + ], + "relevancy_score": 34.3 + }, + { + "architecture_id": "Qwen3ASRForConditionalGeneration", + "total_models": 1, + "total_downloads": 516, + "min_param_count": 782426112, + "sample_models": [ + "hypaai/Qwen3-ASR-0.6B_2026-03-22_04-35-10" + ], + "relevancy_score": 34.3 }, { "architecture_id": "BolmoForCausalLM", "total_models": 2, - "total_downloads": 2032, + "total_downloads": 2003, "min_param_count": 1468911776, "sample_models": [ "allenai/Bolmo-1B", "allenai/Bolmo-7B" ], - "relevancy_score": 34.3 + "relevancy_score": 33.9 }, { - "architecture_id": "XCurOSForCausalLM", + "architecture_id": "MoELLaVAQwen2ForCausalLM", "total_models": 1, - "total_downloads": 93590, - "min_param_count": 7615616512, + "total_downloads": 2702, + "min_param_count": 1406119552, "sample_models": [ - "XCurOS/XCurOS-0.1-8B-Instruct" + "KKHYA/llavaqwen2.5-0.5b-finetune-moe-4e-2k_20260331_194516" ], - "relevancy_score": 34.2 + "relevancy_score": 33.9 }, { - "architecture_id": "MoELLaVAQwen2ForCausalLM", + "architecture_id": "GiddForDiffusionLM", + "total_models": 2, + "total_downloads": 1803, + "min_param_count": 2844349440, + "sample_models": [ + "dvruette/gidd-unif-3b", + "dvruette/gidd-mask-3b" + ], + "relevancy_score": 33.6 + }, + { + "architecture_id": "TarsierForConditionalGeneration", "total_models": 1, - "total_downloads": 2618, - "min_param_count": 1406119552, + "total_downloads": 86089, + "min_param_count": 7063427072, "sample_models": [ - "KKHYA/llavaqwen2.5-0.5b-finetune-moe-4e-2k_20260331_194516" + "omni-research/Tarsier-7b" ], - "relevancy_score": 34.2 + "relevancy_score": 33.5 }, { "architecture_id": "OlmoHybridForCausalLM", "total_models": 4, - "total_downloads": 35834, + "total_downloads": 35503, "min_param_count": 7430870688, "sample_models": [ - "allenai/Olmo-Hybrid-7B", - "allenai/Olmo-Hybrid-Instruct-DPO-7B", - "allenai/Olmo-Hybrid-Instruct-SFT-7B", - "allenai/Olmo-Hybrid-Think-SFT-7B" + "allenai/Olmo-Hybrid-7B", + "allenai/Olmo-Hybrid-Instruct-DPO-7B", + "allenai/Olmo-Hybrid-Instruct-SFT-7B", + "allenai/Olmo-Hybrid-Think-SFT-7B" + ], + "relevancy_score": 33.4 + }, + { + "architecture_id": "ArgonneModel", + "total_models": 2, + "total_downloads": 1627, + "min_param_count": 1273807360, + "sample_models": [ + "PursuitOfDataScience/Argonne2.5-instruct", + "PursuitOfDataScience/Argonne2.5-base" ], - "relevancy_score": 34.1 + "relevancy_score": 33.4 }, { - "architecture_id": "TarsierForConditionalGeneration", + "architecture_id": "MobileLLMP1ForCausalLM", "total_models": 1, - "total_downloads": 87008, - "min_param_count": 7063427072, + "total_downloads": 2086, + "min_param_count": 1084453120, "sample_models": [ - "omni-research/Tarsier-7b" + "facebook/MobileLLM-Pro-base" ], - "relevancy_score": 34.1 + "relevancy_score": 33.3 }, { "architecture_id": "HybridQwen3ForCausalLM", "total_models": 9, - "total_downloads": 7400, + "total_downloads": 7439, "min_param_count": 8495712960, "sample_models": [ "amazon/GKA-primed-HQwen3-8B-Instruct", "amazon/Mamba2-primed-HQwen3-8B-Instruct", - "amazon/GDN-primed-HQwen3-8B-Instruct", "amazon/GDN-primed-HQwen3-32B-Instruct", + "amazon/GDN-primed-HQwen3-8B-Instruct", "amazon/GKA-primed-HQwen3-32B-Instruct", "amazon/BMOJOF-primed-HQwen3-8B-Instruct", "amazon/GKA-primed-HQwen3-8B-Reasoner", "amazon/GDN-primed-HQwen3-8B-Reasoner", "amazon/GKA-primed-HQwen3-32B-Reasoner" ], - "relevancy_score": 33.9 + "relevancy_score": 33.1 + }, + { + "architecture_id": "JetNemotronForCausalLM", + "total_models": 2, + "total_downloads": 8242, + "min_param_count": 3960424768, + "sample_models": [ + "jet-ai/Jet-Nemotron-2B", + "jet-ai/Jet-Nemotron-4B" + ], + "relevancy_score": 33.0 }, { "architecture_id": "Rwkv6ForCausalLM", "total_models": 8, - "total_downloads": 8437, + "total_downloads": 8905, "min_param_count": 7635746816, "sample_models": [ - "RWKV/v6-Finch-1B6-HF", "RWKV/v6-Finch-7B-HF", + "RWKV/v6-Finch-1B6-HF", + "RWKV/rwkv-6-world-3b", "RWKV/rwkv-6-world-1b6", "RWKV/rwkv-6-world-7b", "RWKV/v6-Finch-14B-HF", "RWKV/v6-Finch-3B-HF", - "RWKV/rwkv-6-world-3b-v2.1", - "RWKV/rwkv-6-world-3b" + "RWKV/rwkv-6-world-3b-v2.1" ], - "relevancy_score": 33.6 + "relevancy_score": 32.9 + }, + { + "architecture_id": "XCurOSForCausalLM", + "total_models": 1, + "total_downloads": 66986, + "min_param_count": 7615616512, + "sample_models": [ + "XCurOS/XCurOS-0.1-8B-Instruct" + ], + "relevancy_score": 32.9 + }, + { + "architecture_id": "SongGenMixedForConditionalGeneration", + "total_models": 1, + "total_downloads": 1723, + "min_param_count": 1363657956, + "sample_models": [ + "LiuZH-19/SongGen_mixed_pro" + ], + "relevancy_score": 32.9 }, { "architecture_id": "JAISLMHeadModel", "total_models": 6, - "total_downloads": 15551, + "total_downloads": 15081, "min_param_count": 7142689824, "sample_models": [ "inceptionai/jais-13b-chat", "katuni4ka/tiny-random-jais", "inceptionai/jais-family-30b-8k", - "inceptionai/jais-13b", "inceptionai/jais-family-13b-chat", + "inceptionai/jais-13b", "inceptionai/jais-family-6p7b-chat" ], - "relevancy_score": 33.6 + "relevancy_score": 32.8 }, { - "architecture_id": "SongGenMixedForConditionalGeneration", + "architecture_id": "OmniASRForConditionalGeneration", "total_models": 1, - "total_downloads": 1843, - "min_param_count": 1363657956, - "sample_models": [ - "LiuZH-19/SongGen_mixed_pro" - ], - "relevancy_score": 33.5 - }, - { - "architecture_id": "ArgonneModel", - "total_models": 2, - "total_downloads": 1321, - "min_param_count": 1273807360, + "total_downloads": 1633, + "min_param_count": 1631506944, "sample_models": [ - "PursuitOfDataScience/Argonne2.5-base", - "PursuitOfDataScience/Argonne2.5-instruct" + "bezzam/omniasr-llm-300m-v2" ], - "relevancy_score": 33.4 + "relevancy_score": 32.8 }, { - "architecture_id": "JetNemotronForCausalLM", + "architecture_id": "OpensciForCausalLM", "total_models": 2, - "total_downloads": 7758, - "min_param_count": 3960424768, + "total_downloads": 1031, + "min_param_count": 1714377728, "sample_models": [ - "jet-ai/Jet-Nemotron-2B", - "jet-ai/Jet-Nemotron-4B" + "ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-DPO-Tulu3-decontaminated", + "ali-elganzory/open-sci-ref-v0.02-1.7b-fineweb-edu-1.4t-300B-4096-longsft_16k-DPO-Tulu3-decontaminated" ], - "relevancy_score": 33.3 + "relevancy_score": 32.4 }, { - "architecture_id": "MobileLLMP1ForCausalLM", + "architecture_id": "Kanana2VecModel", "total_models": 1, - "total_downloads": 1750, - "min_param_count": 1084453120, + "total_downloads": 1330, + "min_param_count": 2086979328, "sample_models": [ - "facebook/MobileLLM-Pro-base" + "kakaocorp/kanana-nano-2.1b-embedding" ], - "relevancy_score": 33.3 + "relevancy_score": 32.3 }, { - "architecture_id": "OmniASRForConditionalGeneration", + "architecture_id": "DeciCoderForCausalLM", "total_models": 1, - "total_downloads": 1628, - "min_param_count": 1631506944, + "total_downloads": 1212, + "min_param_count": 1113671680, "sample_models": [ - "bezzam/omniasr-llm-300m-v2" + "Deci/DeciCoder-1b" ], - "relevancy_score": 33.2 + "relevancy_score": 32.1 }, { - "architecture_id": "Kanana2VecModel", + "architecture_id": "StableLMAlphaForCausalLM", "total_models": 1, - "total_downloads": 1350, - "min_param_count": 2086979328, + "total_downloads": 7022, + "min_param_count": 6889414656, "sample_models": [ - "kakaocorp/kanana-nano-2.1b-embedding" + "stabilityai/stablelm-base-alpha-7b-v2" ], - "relevancy_score": 32.8 + "relevancy_score": 32.0 }, { - "architecture_id": "GiddForDiffusionLM", - "total_models": 1, - "total_downloads": 1287, - "min_param_count": 2957629440, + "architecture_id": "IQuestCoderForCausalLM", + "total_models": 4, + "total_downloads": 17001, + "min_param_count": 7612810240, "sample_models": [ - "dvruette/gidd-unif-3b" + "IQuestLab/IQuest-Coder-V1-40B-Instruct", + "IQuestLab/IQuest-Coder-V1-7B-Instruct", + "Multilingual-Multimodal-NLP/IndustrialCoder", + "IQuestLab/IQuest-Coder-V1-40B-Thinking" ], - "relevancy_score": 32.7 + "relevancy_score": 31.8 }, { "architecture_id": "XLNetLMHeadModel", "total_models": 5, - "total_downloads": 479033, + "total_downloads": 433085, "min_param_count": null, "sample_models": [ "xlnet/xlnet-base-cased", @@ -2191,86 +2223,89 @@ "sshleifer/tiny-xlnet-base-cased", "textattack/xlnet-base-cased-imdb" ], - "relevancy_score": 32.6 + "relevancy_score": 31.5 }, { "architecture_id": "AeroForConditionalGeneration", "total_models": 1, - "total_downloads": 1269, + "total_downloads": 902, "min_param_count": 2416221184, "sample_models": [ "lmms-lab/Aero-1-Audio" ], - "relevancy_score": 32.6 - }, - { - "architecture_id": "IQuestCoderForCausalLM", - "total_models": 4, - "total_downloads": 17567, - "min_param_count": 7612810240, - "sample_models": [ - "IQuestLab/IQuest-Coder-V1-40B-Instruct", - "IQuestLab/IQuest-Coder-V1-7B-Instruct", - "Multilingual-Multimodal-NLP/IndustrialCoder", - "IQuestLab/IQuest-Coder-V1-40B-Thinking" - ], - "relevancy_score": 32.5 + "relevancy_score": 31.5 }, { - "architecture_id": "StableLMAlphaForCausalLM", + "architecture_id": "Qwen3VLForConditionalGeneration", "total_models": 1, - "total_downloads": 7233, - "min_param_count": 6889414656, + "total_downloads": 863, + "min_param_count": 2127532032, "sample_models": [ - "stabilityai/stablelm-base-alpha-7b-v2" + "Oysiyl/qwen3-vl-2b-unslop-good-lora-v1" ], - "relevancy_score": 32.5 + "relevancy_score": 31.4 }, { - "architecture_id": "DeciCoderForCausalLM", - "total_models": 1, - "total_downloads": 1190, - "min_param_count": 1113671680, + "architecture_id": "Qwen3MoeForCausalLM", + "total_models": 7, + "total_downloads": 5780, + "min_param_count": 8001454080, "sample_models": [ - "Deci/DeciCoder-1b" + "AIDC-AI/Marco-Nano-Instruct", + "zianglih/Qwen3-30B-A3B-Instruct-2507-MXFP8-last-8-BF16", + "AIDC-AI/Marco-Mini-Instruct", + "Dynamical-Systems/Dynamical-30B-A3B", + "bineric/lynx-instruct-30b", + "OpenMOSS-Team/SciJudge-30B", + "unsloth/Qwen3-30B-A3B-Thinking-2507" ], - "relevancy_score": 32.5 + "relevancy_score": 31.3 }, { "architecture_id": "GritLM", "total_models": 1, - "total_downloads": 30472, + "total_downloads": 31461, "min_param_count": 7241732096, "sample_models": [ "parasail-ai/GritLM-7B-vllm" ], - "relevancy_score": 31.7 + "relevancy_score": 31.3 }, { "architecture_id": "AXK1ForCausalLM", "total_models": 2, - "total_downloads": 19527, + "total_downloads": 19319, "min_param_count": 11448603648, "sample_models": [ "skt/A.X-K1", "thkim93/axk1-2layers" ], - "relevancy_score": 31.4 + "relevancy_score": 30.8 }, { - "architecture_id": "Lfm2Prototype1ForCausalLM", + "architecture_id": "VeridianForCausalLM", "total_models": 1, - "total_downloads": 735, - "min_param_count": 1212304128, + "total_downloads": 662, + "min_param_count": 1659913728, "sample_models": [ - "nntsuzu/LFM2-SFT-Prototype01-1.2B-JP" + "MagistrTheOne/veridian-beta" ], - "relevancy_score": 31.4 + "relevancy_score": 30.8 + }, + { + "architecture_id": "HymbaForCausalLM", + "total_models": 1, + "total_downloads": 645, + "min_param_count": 1522797824, + "sample_models": [ + "nvidia/Hymba-1.5B-Instruct" + ], + "relevancy_score": 30.8 }, { "architecture_id": "IdeficsForVisionText2Text", "total_models": 4, - "total_downloads": 10302, + "total_downloads": 10405, "min_param_count": 8929682192, "sample_models": [ "HuggingFaceM4/idefics-80b-instruct", @@ -2278,140 +2313,140 @@ "HuggingFaceM4/idefics-9b-instruct", "HuggingFaceM4/idefics-80b" ], - "relevancy_score": 31.3 - }, - { - "architecture_id": "InternVLChatModel", - "total_models": 1, - "total_downloads": 4299, - "min_param_count": 3712637952, - "sample_models": [ - "numind/NuExtract-2-4B-experimental" - ], - "relevancy_score": 31.3 + "relevancy_score": 30.7 }, { - "architecture_id": "CambrianQwenForCausalLM", + "architecture_id": "Lfm2Prototype1ForCausalLM", "total_models": 1, - "total_downloads": 4196, - "min_param_count": 3986951616, + "total_downloads": 634, + "min_param_count": 1212304128, "sample_models": [ - "nyu-visionx/Scale-RAE-Qwen1.5B_DiT2.4B" + "nntsuzu/LFM2-SFT-Prototype01-1.2B-JP" ], - "relevancy_score": 31.3 + "relevancy_score": 30.7 }, { - "architecture_id": "CheXagentForCausalLM", - "total_models": 1, - "total_downloads": 4149, - "min_param_count": 3140746752, + "architecture_id": "ChatGLMModel", + "total_models": 2, + "total_downloads": 17564, + "min_param_count": 9399951392, "sample_models": [ - "StanfordAIMI/CheXagent-2-3b" + "zai-org/codegeex4-all-9b", + "zai-org/glm-4-9b" ], - "relevancy_score": 31.3 + "relevancy_score": 30.6 }, { "architecture_id": "PersimmonForCausalLM", "total_models": 3, - "total_downloads": 12967, + "total_downloads": 12300, "min_param_count": 8823735296, "sample_models": [ "adept/persimmon-8b-chat", "adept/persimmon-8b-base", "pszemraj/perSLIMmon-8b-base" ], - "relevancy_score": 31.2 + "relevancy_score": 30.5 }, { - "architecture_id": "Phi3SmallForCausalLM", - "total_models": 2, - "total_downloads": 17559, - "min_param_count": 7392272384, + "architecture_id": "FlexOlmoForCausalLM", + "total_models": 3, + "total_downloads": 12280, + "min_param_count": 11627401216, "sample_models": [ - "microsoft/Phi-3-small-8k-instruct", - "microsoft/Phi-3-small-128k-instruct" + "allenai/Flex-reddit-2x7B-1T", + "allenai/FlexOlmo-7x7B-1T-RT", + "shanearora/Flex-reddit-2x7B-1T" ], - "relevancy_score": 31.2 + "relevancy_score": 30.5 }, { - "architecture_id": "HymbaForCausalLM", + "architecture_id": "TinyChartPhiForCausalLM", "total_models": 1, - "total_downloads": 667, - "min_param_count": 1522797824, + "total_downloads": 3551, + "min_param_count": 3189407648, "sample_models": [ - "nvidia/Hymba-1.5B-Instruct" + "mPLUG/TinyChart-3B-768" ], - "relevancy_score": 31.2 + "relevancy_score": 30.5 }, { - "architecture_id": "FlexOlmoForCausalLM", - "total_models": 3, - "total_downloads": 12568, - "min_param_count": 11627401216, + "architecture_id": "MixFormerSequentialForCausalLM", + "total_models": 1, + "total_downloads": 562, + "min_param_count": 2779683840, "sample_models": [ - "allenai/Flex-reddit-2x7B-1T", - "allenai/FlexOlmo-7x7B-1T-RT", - "shanearora/Flex-reddit-2x7B-1T" + "SkunkworksAI/phi-2" ], - "relevancy_score": 31.1 + "relevancy_score": 30.5 }, { - "architecture_id": "ChatGLMModel", + "architecture_id": "Phi3SmallForCausalLM", "total_models": 2, - "total_downloads": 17362, - "min_param_count": 9399951392, + "total_downloads": 15892, + "min_param_count": 7392272384, "sample_models": [ - "zai-org/codegeex4-all-9b", - "zai-org/glm-4-9b" + "microsoft/Phi-3-small-8k-instruct", + "microsoft/Phi-3-small-128k-instruct" ], - "relevancy_score": 31.1 + "relevancy_score": 30.4 }, { "architecture_id": "SpatialLMLlamaForCausalLM", "total_models": 1, - "total_downloads": 578, - "min_param_count": 1345883776, + "total_downloads": 515, + "min_param_count": 1247355840, "sample_models": [ - "manycore-research/SpatialLM1.1-Llama-1B" + "manycore-research/SpatialLM-Llama-1B" ], - "relevancy_score": 30.9 + "relevancy_score": 30.3 }, { - "architecture_id": "MixFormerSequentialForCausalLM", + "architecture_id": "CambrianQwenForCausalLM", "total_models": 1, - "total_downloads": 561, - "min_param_count": 2779683840, + "total_downloads": 2935, + "min_param_count": 3986951616, "sample_models": [ - "SkunkworksAI/phi-2" + "nyu-visionx/Scale-RAE-Qwen1.5B_DiT2.4B" ], - "relevancy_score": 30.8 + "relevancy_score": 30.1 }, { "architecture_id": "StripedHyenaModelForCausalLM", "total_models": 3, - "total_downloads": 10515, + "total_downloads": 9917, "min_param_count": 7646024704, "sample_models": [ "togethercomputer/evo-1-131k-base", "togethercomputer/evo-1-8k-base", "togethercomputer/StripedHyena-Nous-7B" ], - "relevancy_score": 30.7 + "relevancy_score": 30.0 }, { "architecture_id": "Maira2ForConditionalGeneration", "total_models": 1, - "total_downloads": 2914, + "total_downloads": 2692, "min_param_count": 6880185600, "sample_models": [ "microsoft/maira-2" ], - "relevancy_score": 30.5 + "relevancy_score": 29.9 + }, + { + "architecture_id": "CheXagentForCausalLM", + "total_models": 1, + "total_downloads": 2648, + "min_param_count": 3140746752, + "sample_models": [ + "StanfordAIMI/CheXagent-2-3b" + ], + "relevancy_score": 29.9 }, { "architecture_id": "BioGptForCausalLM", "total_models": 5, - "total_downloads": 179801, + "total_downloads": 174528, "min_param_count": null, "sample_models": [ "microsoft/biogpt", @@ -2420,194 +2455,211 @@ "hf-tiny-model-private/tiny-random-BioGptForCausalLM", "zequnl/molxpt" ], - "relevancy_score": 30.4 + "relevancy_score": 29.5 }, { - "architecture_id": "Ernie4_5_MoeForCausalLM", - "total_models": 4, - "total_downloads": 39215, - "min_param_count": 21825437888, + "architecture_id": "InternVLChatModel", + "total_models": 1, + "total_downloads": 2229, + "min_param_count": 3712637952, "sample_models": [ - "baidu/ERNIE-4.5-21B-A3B-PT", - "baidu/ERNIE-4.5-21B-A3B-Base-PT", - "baidu/ERNIE-4.5-21B-A3B-Thinking", - "baidu/ERNIE-4.5-300B-A47B-PT" + "numind/NuExtract-2-4B-experimental" ], - "relevancy_score": 30.3 + "relevancy_score": 29.5 }, { - "architecture_id": "BailingMoeV2ForCausalLM", - "total_models": 5, - "total_downloads": 20571, - "min_param_count": 16255643392, + "architecture_id": "MatriochkaForCausalLM", + "total_models": 1, + "total_downloads": 2159, + "min_param_count": 3358735360, "sample_models": [ - "inclusionAI/Ling-mini-2.0", - "inclusionAI/Ling-1T", - "inclusionAI/Ring-mini-2.0", - "inclusionAI/Ling-flash-2.0", - "inclusionAI/Ling-flash-base-2.0" + "nthngdy/matryoshka-3B" + ], + "relevancy_score": 29.4 + }, + { + "architecture_id": "SolarOpenForCausalLM", + "total_models": 2, + "total_downloads": 343068, + "min_param_count": null, + "sample_models": [ + "upstage/Solar-Open-100B", + "nota-ai/Solar-Open-100B-NotaMoEQuant-Int4" ], - "relevancy_score": 29.5 + "relevancy_score": 29.1 }, { "architecture_id": "LatentMoELLaVAPhiForCausalLM", "total_models": 1, - "total_downloads": 1728, + "total_downloads": 1792, "min_param_count": 3093139456, "sample_models": [ "KKHYA/llavaphi2-2.7b-finetune-latent-sparse-moe-4e-2k-freeze-1.0_20260304_075653" ], - "relevancy_score": 29.3 + "relevancy_score": 29.0 }, { - "architecture_id": "SolarOpenForCausalLM", - "total_models": 2, - "total_downloads": 264805, - "min_param_count": null, + "architecture_id": "LlamaForCasualLM", + "total_models": 1, + "total_downloads": 1613, + "min_param_count": 3212749824, "sample_models": [ - "upstage/Solar-Open-100B", - "nota-ai/Solar-Open-100B-NotaMoEQuant-Int4" + "CoRover/BharatGPT-3B-Indic" ], - "relevancy_score": 29.2 + "relevancy_score": 28.8 }, { "architecture_id": "Qwen2ForSequenceClassification", "total_models": 2, - "total_downloads": 7086, + "total_downloads": 7132, "min_param_count": 7070622720, "sample_models": [ "nvidia/AceMath-7B-RM", "nvidia/Qwen2.5-CascadeRL-RM-72B" ], - "relevancy_score": 29.1 - }, - { - "architecture_id": "MatriochkaForCausalLM", - "total_models": 1, - "total_downloads": 1562, - "min_param_count": 3358735360, - "sample_models": [ - "nthngdy/matryoshka-3B" - ], - "relevancy_score": 29.1 + "relevancy_score": 28.6 }, { "architecture_id": "DeepseekForCausalLM", "total_models": 2, - "total_downloads": 40954, + "total_downloads": 41625, "min_param_count": 16375728128, "sample_models": [ "deepseek-ai/deepseek-moe-16b-base", "deepseek-ai/deepseek-moe-16b-chat" ], - "relevancy_score": 29.0 + "relevancy_score": 28.5 + }, + { + "architecture_id": "MobilintLlamaForCausalLM", + "total_models": 9, + "total_downloads": 31316, + "min_param_count": null, + "sample_models": [ + "mobilint/Llama-3.2-1B-Instruct", + "mobilint/Llama-3.2-3B-Instruct", + "mobilint/Llama-3.1-8B-Instruct", + "mobilint/HyperCLOVAX-SEED-Text-Instruct-1.5B", + "mobilint/Llama-3.2-1B-Instruct-Batch16", + "mobilint/Llama-3.1-8B-Instruct-Batch16", + "mobilint/Llama-3.2-3B-Instruct-Batch16", + "mobilint/Llama-3.2-3B-Instruct-Batch32", + "mobilint/Llama-3.1-8B-Instruct-Batch32" + ], + "relevancy_score": 28.3 }, { "architecture_id": "Jais2ForCausalLM", "total_models": 2, - "total_downloads": 6256, + "total_downloads": 6100, "min_param_count": 8090401280, "sample_models": [ "inceptionai/Jais-2-8B-Chat", "inceptionai/Jais-2-70B-Chat" ], - "relevancy_score": 28.9 + "relevancy_score": 28.3 }, { - "architecture_id": "ChatGLMForConditionalGeneration", - "total_models": 2, - "total_downloads": 4858, - "min_param_count": 9399951392, + "architecture_id": "BailingMoeV2ForCausalLM", + "total_models": 4, + "total_downloads": 19076, + "min_param_count": 16255643392, "sample_models": [ - "IAAR-Shanghai/xVerify-9B-C", - "qiuhuachuan/MeChat" + "inclusionAI/Ling-mini-2.0", + "inclusionAI/Ling-1T", + "inclusionAI/Ling-flash-2.0", + "inclusionAI/Ling-flash-base-2.0" ], - "relevancy_score": 28.3 + "relevancy_score": 28.0 }, { - "architecture_id": "ReformerModelWithLMHead", + "architecture_id": "ChatGLMForConditionalGeneration", "total_models": 2, - "total_downloads": 159282, - "min_param_count": null, + "total_downloads": 4797, + "min_param_count": 9399951392, "sample_models": [ - "google/reformer-crime-and-punishment", - "google/reformer-enwik8" + "IAAR-Shanghai/xVerify-9B-C", + "qiuhuachuan/MeChat" ], - "relevancy_score": 28.1 + "relevancy_score": 27.8 }, { "architecture_id": "LamedPhi3ForCausalLM", "total_models": 1, - "total_downloads": 985, + "total_downloads": 975, "min_param_count": 4049101904, "sample_models": [ "GoodBaiBai88/M3D-LaMed-Phi-3-4B" ], - "relevancy_score": 28.1 + "relevancy_score": 27.7 }, { - "architecture_id": "SarvamMLAForCausalLM", - "total_models": 2, - "total_downloads": 152436, - "min_param_count": 55732545631, + "architecture_id": "Gemma4TextModel", + "total_models": 1, + "total_downloads": 967, + "min_param_count": 4647449856, "sample_models": [ - "aoxo/sarvam-105b-uncensored", - "sarvamai/sarvam-105b" + "bRadu/gemma-4-E2B-it-textonly" ], - "relevancy_score": 28.0 + "relevancy_score": 27.7 }, { "architecture_id": "WeDLMForCausalLM", "total_models": 2, - "total_downloads": 4256, + "total_downloads": 4219, "min_param_count": 8190735360, "sample_models": [ "tencent/WeDLM-8B-Base", "tencent/WeDLM-8B-Instruct" ], - "relevancy_score": 28.0 + "relevancy_score": 27.5 }, { - "architecture_id": "SarvamMoEForCausalLM", + "architecture_id": "SarvamMLAForCausalLM", "total_models": 2, - "total_downloads": 149370, - "min_param_count": 32152650368, + "total_downloads": 151877, + "min_param_count": 55732545631, "sample_models": [ - "aoxo/sarvam-30b-uncensored", - "sarvamai/sarvam-30b" + "aoxo/sarvam-105b-uncensored", + "sarvamai/sarvam-105b" + ], + "relevancy_score": 27.3 + }, + { + "architecture_id": "ReformerModelWithLMHead", + "total_models": 2, + "total_downloads": 150609, + "min_param_count": null, + "sample_models": [ + "google/reformer-crime-and-punishment", + "google/reformer-enwik8" ], - "relevancy_score": 27.9 + "relevancy_score": 27.3 }, { "architecture_id": "HyperCLOVAXForCausalLM", "total_models": 1, - "total_downloads": 31787, + "total_downloads": 31859, "min_param_count": 14748112896, "sample_models": [ "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B" ], - "relevancy_score": 27.8 + "relevancy_score": 27.3 }, { - "architecture_id": "MobilintLlamaForCausalLM", - "total_models": 7, - "total_downloads": 29407, - "min_param_count": null, + "architecture_id": "LongLlamaForCausalLM", + "total_models": 1, + "total_downloads": 780, + "min_param_count": 3426474900, "sample_models": [ - "mobilint/Llama-3.2-3B-Instruct", - "mobilint/Llama-3.2-1B-Instruct", - "mobilint/Llama-3.1-8B-Instruct", - "mobilint/HyperCLOVAX-SEED-Text-Instruct-1.5B", - "mobilint/Llama-3.2-1B-Instruct-Batch16", - "mobilint/Llama-3.1-8B-Instruct-Batch16", - "mobilint/Llama-3.2-3B-Instruct-Batch16" + "syzymon/long_llama_3b" ], - "relevancy_score": 27.7 + "relevancy_score": 27.2 }, { "architecture_id": "InternLMForCausalLM", "total_models": 4, - "total_downloads": 72325, + "total_downloads": 71468, "min_param_count": null, "sample_models": [ "internlm/internlm-chat-7b", @@ -2615,123 +2667,124 @@ "internlm/internlm-7b", "internlm/internlm-chat-20b" ], - "relevancy_score": 27.7 - }, - { - "architecture_id": "LongLlamaForCausalLM", - "total_models": 1, - "total_downloads": 756, - "min_param_count": 3426474900, - "sample_models": [ - "syzymon/long_llama_3b" - ], - "relevancy_score": 27.5 + "relevancy_score": 26.9 }, { - "architecture_id": "GPTNeoXJapaneseForCausalLM", + "architecture_id": "SarvamMoEForCausalLM", "total_models": 2, - "total_downloads": 113485, - "min_param_count": null, + "total_downloads": 123774, + "min_param_count": 32152650368, "sample_models": [ - "abeja/gpt-neox-japanese-2.7b", - "hf-tiny-model-private/tiny-random-GPTNeoXJapaneseForCausalLM" + "aoxo/sarvam-30b-uncensored", + "sarvamai/sarvam-30b" ], - "relevancy_score": 27.3 + "relevancy_score": 26.9 }, { - "architecture_id": "SparseLlamaForCausalLM", + "architecture_id": "ZambaForCausalLM", "total_models": 1, - "total_downloads": 4221, - "min_param_count": 8185270336, + "total_downloads": 4128, + "min_param_count": 7232490496, "sample_models": [ - "openbmb/NOSA-8B" + "Zyphra/Zamba-7B-v1" ], - "relevancy_score": 27.3 + "relevancy_score": 26.8 }, { - "architecture_id": "ZambaForCausalLM", - "total_models": 1, - "total_downloads": 4140, - "min_param_count": 7232490496, + "architecture_id": "GPTNeoXJapaneseForCausalLM", + "total_models": 2, + "total_downloads": 110049, + "min_param_count": null, "sample_models": [ - "Zyphra/Zamba-7B-v1" + "abeja/gpt-neox-japanese-2.7b", + "hf-tiny-model-private/tiny-random-GPTNeoXJapaneseForCausalLM" ], - "relevancy_score": 27.3 + "relevancy_score": 26.6 }, { - "architecture_id": "Gemma4TextModel", + "architecture_id": "JetMoEForCausalLM", "total_models": 1, - "total_downloads": 650, - "min_param_count": 4647449856, + "total_downloads": 3627, + "min_param_count": 8522237952, "sample_models": [ - "bRadu/gemma-4-E2B-it-textonly" + "jetmoe/jetmoe-8b" ], - "relevancy_score": 27.1 + "relevancy_score": 26.5 }, { - "architecture_id": "BailingMoeForCausalLM", + "architecture_id": "CXRMate2ForConditionalGeneration", "total_models": 1, - "total_downloads": 21607, - "min_param_count": 16801974272, + "total_downloads": 572, + "min_param_count": 3322260224, "sample_models": [ - "inclusionAI/Ling-lite-1.5" + "aehrc/cxrmate-2" ], - "relevancy_score": 27.0 + "relevancy_score": 26.5 }, { - "architecture_id": "JetMoEForCausalLM", + "architecture_id": "BunnyPhiForCausalLM", "total_models": 1, - "total_downloads": 3665, - "min_param_count": 8522237952, + "total_downloads": 564, + "min_param_count": 3182254624, "sample_models": [ - "jetmoe/jetmoe-8b" + "BAAI/Bunny-v1_0-3B" ], - "relevancy_score": 27.0 + "relevancy_score": 26.5 }, { "architecture_id": "Step3p5ForCausalLM", "total_models": 1, - "total_downloads": 123608, + "total_downloads": 133597, "min_param_count": 199384301376, "sample_models": [ "stepfun-ai/Step-3.5-Flash" ], - "relevancy_score": 26.8 + "relevancy_score": 26.4 }, { - "architecture_id": "CXRMate2ForConditionalGeneration", + "architecture_id": "BailingMoeForCausalLM", "total_models": 1, - "total_downloads": 560, - "min_param_count": 3322260224, + "total_downloads": 20878, + "min_param_count": 16801974272, "sample_models": [ - "aehrc/cxrmate-2" + "inclusionAI/Ling-lite-1.5" ], - "relevancy_score": 26.8 + "relevancy_score": 26.4 }, { - "architecture_id": "BunnyPhiForCausalLM", + "architecture_id": "SparseLlamaForCausalLM", "total_models": 1, - "total_downloads": 507, - "min_param_count": 3182254624, + "total_downloads": 3131, + "min_param_count": 8185270336, "sample_models": [ - "BAAI/Bunny-v1_0-3B" + "openbmb/NOSA-8B" ], - "relevancy_score": 26.6 + "relevancy_score": 26.2 }, { "architecture_id": "Esm2LlamaInstructForCausalLM", "total_models": 1, - "total_downloads": 2603, + "total_downloads": 2589, "min_param_count": 10878983201, "sample_models": [ "xiao-fei/Prot2Text-V2-11B-Instruct-hf" ], - "relevancy_score": 26.2 + "relevancy_score": 25.8 + }, + { + "architecture_id": "Qwen2VLAudioForConditionalGeneration", + "total_models": 1, + "total_downloads": 2177, + "min_param_count": 8932935680, + "sample_models": [ + "MayaKD/qwen2-vl-audio" + ], + "relevancy_score": 25.4 }, { "architecture_id": "OuroForCausalLM", "total_models": 4, - "total_downloads": 34326, + "total_downloads": 34507, "min_param_count": null, "sample_models": [ "ByteDance/Ouro-1.4B", @@ -2739,54 +2792,53 @@ "ByteDance/Ouro-2.6B", "ByteDance/Ouro-1.4B-Thinking" ], - "relevancy_score": 26.0 + "relevancy_score": 25.3 }, { - "architecture_id": "StableDiffcoderForCausalLM", - "total_models": 2, - "total_downloads": 1719, - "min_param_count": 8250462208, + "architecture_id": "FP8Qwen3ForCausalLM", + "total_models": 1, + "total_downloads": 1945, + "min_param_count": 8190735360, "sample_models": [ - "ByteDance-Seed/Stable-DiffCoder-8B-Instruct", - "ByteDance-Seed/Stable-DiffCoder-8B-Base" + "xihc-ucb/Qwen3-8B-Base-train-Quasar-0809" ], - "relevancy_score": 26.0 + "relevancy_score": 25.2 }, { - "architecture_id": "Qwen2VLAudioForConditionalGeneration", + "architecture_id": "CheXagentForConditionalGeneration", "total_models": 1, - "total_downloads": 2174, - "min_param_count": 8932935680, + "total_downloads": 1878, + "min_param_count": 8362401664, "sample_models": [ - "MayaKD/qwen2-vl-audio" + "StanfordAIMI/CheXagent-8b" ], - "relevancy_score": 25.8 + "relevancy_score": 25.1 + }, + { + "architecture_id": "FP8Qwen2ForCausalLM", + "total_models": 1, + "total_downloads": 1781, + "min_param_count": 7615616512, + "sample_models": [ + "xihc-ucb/Qwen2.5-7B-train-Quasar-1214" + ], + "relevancy_score": 25.0 }, { "architecture_id": "BaiChuanForCausalLM", "total_models": 2, - "total_downloads": 51261, + "total_downloads": 50672, "min_param_count": null, "sample_models": [ "baichuan-inc/Baichuan-7B", "FreedomIntelligence/HuatuoGPT-7B" ], - "relevancy_score": 25.6 - }, - { - "architecture_id": "FP8Qwen3ForCausalLM", - "total_models": 1, - "total_downloads": 1941, - "min_param_count": 8190735360, - "sample_models": [ - "xihc-ucb/Qwen3-8B-Base-train-Quasar-0809" - ], - "relevancy_score": 25.6 + "relevancy_score": 24.9 }, { "architecture_id": "MobilintQwen2ForCausalLM", "total_models": 4, - "total_downloads": 27152, + "total_downloads": 27256, "min_param_count": null, "sample_models": [ "mobilint/Qwen2.5-0.5B-Instruct", @@ -2794,97 +2846,75 @@ "mobilint/Qwen2.5-3B-Instruct", "mobilint/Qwen2.5-7B-Instruct" ], - "relevancy_score": 25.5 - }, - { - "architecture_id": "MobilintQwen3ForCausalLM", - "total_models": 4, - "total_downloads": 25718, - "min_param_count": null, - "sample_models": [ - "mobilint/Qwen3-0.6B", - "mobilint/Qwen3-1.7B", - "mobilint/Qwen3-4B", - "mobilint/Qwen3-8B" - ], - "relevancy_score": 25.4 + "relevancy_score": 24.8 }, { - "architecture_id": "HCXVisionForCausalLM", + "architecture_id": "KORMoForCausalLM", "total_models": 1, - "total_downloads": 64666, - "min_param_count": null, + "total_downloads": 1616, + "min_param_count": 10756624384, "sample_models": [ - "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B" + "KORMo-Team/KORMo-10B-sft" ], - "relevancy_score": 25.4 + "relevancy_score": 24.8 }, { - "architecture_id": "FP8Qwen2ForCausalLM", - "total_models": 1, - "total_downloads": 1774, - "min_param_count": 7615616512, + "architecture_id": "MobilintQwen3ForCausalLM", + "total_models": 4, + "total_downloads": 25791, + "min_param_count": null, "sample_models": [ - "xihc-ucb/Qwen2.5-7B-train-Quasar-1214" + "mobilint/Qwen3-0.6B", + "mobilint/Qwen3-1.7B", + "mobilint/Qwen3-4B", + "mobilint/Qwen3-8B" ], - "relevancy_score": 25.4 + "relevancy_score": 24.7 }, { - "architecture_id": "CheXagentForConditionalGeneration", + "architecture_id": "MiMoV2FlashForCausalLM", "total_models": 1, - "total_downloads": 1766, - "min_param_count": 8362401664, + "total_downloads": 61449, + "min_param_count": 309785318400, "sample_models": [ - "StanfordAIMI/CheXagent-8b" + "XiaomiMiMo/MiMo-V2-Flash" ], - "relevancy_score": 25.4 + "relevancy_score": 24.7 }, { "architecture_id": "KimiLinearForCausalLM", "total_models": 1, - "total_downloads": 60910, + "total_downloads": 61051, "min_param_count": 49122681728, "sample_models": [ "moonshotai/Kimi-Linear-48B-A3B-Instruct" ], - "relevancy_score": 25.3 + "relevancy_score": 24.7 }, { - "architecture_id": "MiMoV2FlashForCausalLM", + "architecture_id": "HCXVisionForCausalLM", "total_models": 1, - "total_downloads": 58903, - "min_param_count": 309785318400, - "sample_models": [ - "XiaomiMiMo/MiMo-V2-Flash" - ], - "relevancy_score": 25.2 - }, - { - "architecture_id": "SeedOssForCausalLM", - "total_models": 3, - "total_downloads": 30541, + "total_downloads": 60376, "min_param_count": null, "sample_models": [ - "ByteDance-Seed/Seed-OSS-36B-Instruct", - "NousResearch/Hermes-4.3-36B", - "ByteDance-Seed/Seed-OSS-36B-Base" + "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B" ], - "relevancy_score": 25.1 + "relevancy_score": 24.7 }, { - "architecture_id": "KORMoForCausalLM", + "architecture_id": "Param2MoEForCausalLM", "total_models": 1, - "total_downloads": 1512, - "min_param_count": 10756624384, + "total_downloads": 8281, + "min_param_count": 17151140480, "sample_models": [ - "KORMo-Team/KORMo-10B-sft" + "bharatgenai/Param2-17B-A2.4B-Thinking" ], - "relevancy_score": 25.0 + "relevancy_score": 24.3 }, { "architecture_id": "MobilintExaoneForCausalLM", "total_models": 4, - "total_downloads": 20678, + "total_downloads": 20759, "min_param_count": null, "sample_models": [ "mobilint/EXAONE-Deep-2.4B", @@ -2892,226 +2922,286 @@ "mobilint/EXAONE-3.5-7.8B-Instruct", "mobilint/EXAONE-Deep-7.8B" ], - "relevancy_score": 24.9 + "relevancy_score": 24.2 + }, + { + "architecture_id": "CogVLMForCausalLM", + "total_models": 2, + "total_downloads": 5861, + "min_param_count": 17639687424, + "sample_models": [ + "zai-org/cogvlm2-llama3-chat-19B", + "zai-org/cogvlm-chat-hf" + ], + "relevancy_score": 24.2 }, { "architecture_id": "MiniCPMSALAForCausalLM", "total_models": 1, - "total_downloads": 1403, + "total_downloads": 1254, "min_param_count": 9477203968, "sample_models": [ "openbmb/MiniCPM-SALA" ], - "relevancy_score": 24.8 + "relevancy_score": 24.2 }, { - "architecture_id": "CogVLMForCausalLM", - "total_models": 2, - "total_downloads": 5474, - "min_param_count": 17639687424, + "architecture_id": "Emu3ForCausalLM", + "total_models": 1, + "total_downloads": 1228, + "min_param_count": 8492011520, "sample_models": [ - "zai-org/cogvlm2-llama3-chat-19B", - "zai-org/cogvlm-chat-hf" + "BAAI/Emu3-Chat" ], - "relevancy_score": 24.6 + "relevancy_score": 24.2 }, { - "architecture_id": "LongcatFlashForCausalLM", + "architecture_id": "BunnyLlamaForCausalLM", "total_models": 1, - "total_downloads": 45650, - "min_param_count": 561862880256, + "total_downloads": 1195, + "min_param_count": 8479990848, "sample_models": [ - "meituan-longcat/LongCat-Flash-Chat" + "typhoon-ai/llama-3-typhoon-v1.5-8b-vision-preview" ], - "relevancy_score": 24.6 + "relevancy_score": 24.1 }, { - "architecture_id": "TrillionForCausalLM", + "architecture_id": "LongcatFlashForCausalLM", "total_models": 1, - "total_downloads": 7481, - "min_param_count": 20725519360, + "total_downloads": 43995, + "min_param_count": 561862880256, "sample_models": [ - "trillionlabs/Tri-21B-Think" + "meituan-longcat/LongCat-Flash-Chat" ], - "relevancy_score": 24.6 + "relevancy_score": 24.0 }, { "architecture_id": "InternLM3ForCausalLM", "total_models": 1, - "total_downloads": 43407, + "total_downloads": 43400, "min_param_count": null, "sample_models": [ "internlm/internlm3-8b-instruct" ], - "relevancy_score": 24.5 - }, - { - "architecture_id": "Param2MoEForCausalLM", - "total_models": 1, - "total_downloads": 7230, - "min_param_count": 17151140480, - "sample_models": [ - "bharatgenai/Param2-17B-A2.4B-Thinking" - ], - "relevancy_score": 24.5 + "relevancy_score": 24.0 }, { "architecture_id": "SteerlingForCausalLM", "total_models": 1, - "total_downloads": 1203, + "total_downloads": 1099, "min_param_count": 8391778304, "sample_models": [ "guidelabs/steerling-8b" ], - "relevancy_score": 24.5 + "relevancy_score": 23.9 }, { - "architecture_id": "Emu3ForCausalLM", + "architecture_id": "ExaoneMoEForCausalLM", "total_models": 1, - "total_downloads": 1181, - "min_param_count": 8492011520, + "total_downloads": 37191, + "min_param_count": 237099669632, "sample_models": [ - "BAAI/Emu3-Chat" + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], - "relevancy_score": 24.5 + "relevancy_score": 23.6 }, { - "architecture_id": "BunnyLlamaForCausalLM", + "architecture_id": "StableDiffcoderForCausalLM", "total_models": 1, - "total_downloads": 1149, - "min_param_count": 8479990848, + "total_downloads": 871, + "min_param_count": 8250462208, "sample_models": [ - "typhoon-ai/llama-3-typhoon-v1.5-8b-vision-preview" + "ByteDance-Seed/Stable-DiffCoder-8B-Instruct" ], - "relevancy_score": 24.4 + "relevancy_score": 23.4 }, { "architecture_id": "MiniMaxM1ForCausalLM", "total_models": 2, - "total_downloads": 24236, + "total_downloads": 23252, "min_param_count": null, "sample_models": [ "MiniMaxAI/MiniMax-M1-40k", "MiniMaxAI/MiniMax-M1-80k" ], - "relevancy_score": 23.9 + "relevancy_score": 23.2 }, { "architecture_id": "ICONNForCausalLM", "total_models": 1, - "total_downloads": 903, + "total_downloads": 745, "min_param_count": 7833409536, "sample_models": [ "ICONNAI/ICONN-1-Mini-Beta" ], - "relevancy_score": 23.9 + "relevancy_score": 23.1 }, { "architecture_id": "Qwen2VLForConditionalGeneration", "total_models": 1, - "total_downloads": 770, + "total_downloads": 726, "min_param_count": 8291375616, "sample_models": [ "typhoon-ai/typhoon2-qwen2vl-7b-vision-instruct" ], - "relevancy_score": 23.5 + "relevancy_score": 23.0 }, { - "architecture_id": "BailingMoeV2_5ForCausalLM", + "architecture_id": "Qwen2Model", "total_models": 1, - "total_downloads": 24780, - "min_param_count": 1012474606720, + "total_downloads": 681, + "min_param_count": 7070619136, "sample_models": [ - "inclusionAI/Ring-2.5-1T" + "NewBeeKing/MemPO_Qwen2.5-SFT-RL" ], - "relevancy_score": 23.3 + "relevancy_score": 22.9 }, { - "architecture_id": "ExaoneMoEForCausalLM", + "architecture_id": "LLaDAMoEModel", "total_models": 1, - "total_downloads": 24437, - "min_param_count": 237099669632, + "total_downloads": 665, + "min_param_count": 7356880896, "sample_models": [ - "LGAI-EXAONE/K-EXAONE-236B-A23B" + "inclusionAI/LLaDA-MoE-7B-A1B-Base" ], - "relevancy_score": 23.2 + "relevancy_score": 22.8 + }, + { + "architecture_id": "Gemma4ForCausalLM", + "total_models": 1, + "total_downloads": 664, + "min_param_count": 7518069034, + "sample_models": [ + "aqweteddy/gemma-4-E4B-it-text" + ], + "relevancy_score": 22.8 + }, + { + "architecture_id": "BailingMoeV2_5ForCausalLM", + "total_models": 1, + "total_downloads": 24448, + "min_param_count": 1012474606720, + "sample_models": [ + "inclusionAI/Ring-2.5-1T" + ], + "relevancy_score": 22.7 }, { "architecture_id": "CogVLMVideoForCausalLM", "total_models": 1, - "total_downloads": 681, + "total_downloads": 622, "min_param_count": 12507532544, "sample_models": [ "zai-org/VisionReward-Video" ], - "relevancy_score": 23.2 + "relevancy_score": 22.7 }, { "architecture_id": "Ernie4_5ForCausalLM", "total_models": 2, - "total_downloads": 17478, + "total_downloads": 17079, "min_param_count": null, "sample_models": [ "baidu/ERNIE-4.5-0.3B-PT", "baidu/ERNIE-4.5-0.3B-Base-PT" ], - "relevancy_score": 23.1 + "relevancy_score": 22.6 }, { "architecture_id": "CLIPT5ForConditionalGeneration", "total_models": 2, - "total_downloads": 16500, + "total_downloads": 17282, "min_param_count": null, "sample_models": [ "zhiqiulin/clip-flant5-xl", "zhiqiulin/clip-flant5-xxl" ], - "relevancy_score": 23.0 + "relevancy_score": 22.6 + }, + { + "architecture_id": "CodeShellForCausalLM", + "total_models": 1, + "total_downloads": 610, + "min_param_count": 7688051328, + "sample_models": [ + "WisdomShell/CodeShell-7B" + ], + "relevancy_score": 22.6 + }, + { + "architecture_id": "SolarForCausalLM", + "total_models": 1, + "total_downloads": 21092, + "min_param_count": null, + "sample_models": [ + "upstage/solar-pro-preview-instruct" + ], + "relevancy_score": 22.4 }, { "architecture_id": "Grok1ModelForCausalLM", "total_models": 1, - "total_downloads": 21640, + "total_downloads": 20827, "min_param_count": null, "sample_models": [ "hpcai-tech/grok-1" ], - "relevancy_score": 23.0 + "relevancy_score": 22.4 }, { - "architecture_id": "CodeShellForCausalLM", + "architecture_id": "InternLM2ForCausalLM", "total_models": 1, - "total_downloads": 614, - "min_param_count": 7688051328, + "total_downloads": 513, + "min_param_count": 7737708544, "sample_models": [ - "WisdomShell/CodeShell-7B" + "AI4Chem/ChemLLM-7B-Chat" ], - "relevancy_score": 23.0 + "relevancy_score": 22.3 }, { - "architecture_id": "SolarForCausalLM", + "architecture_id": "GptOssPuzzleForCausalLM", "total_models": 1, - "total_downloads": 21294, - "min_param_count": null, + "total_downloads": 18998, + "min_param_count": 90837823680, "sample_models": [ - "upstage/solar-pro-preview-instruct" + "nvidia/gpt-oss-puzzle-88B" ], - "relevancy_score": 22.9 + "relevancy_score": 22.2 }, { - "architecture_id": "LLaDAMoEModel", + "architecture_id": "TrillionForCausalLM", "total_models": 1, - "total_downloads": 586, - "min_param_count": 7356880896, + "total_downloads": 3124, + "min_param_count": 20725519360, "sample_models": [ - "inclusionAI/LLaDA-MoE-7B-A1B-Base" + "trillionlabs/Tri-21B-Think" ], - "relevancy_score": 22.9 + "relevancy_score": 22.2 + }, + { + "architecture_id": "RecaLLMLlamaForCausalLM", + "total_models": 1, + "total_downloads": 506, + "min_param_count": 8030294016, + "sample_models": [ + "kswhitecross/RecaLLM-Llama-3.1-8B" + ], + "relevancy_score": 22.2 + }, + { + "architecture_id": "CohereForCausalLM", + "total_models": 1, + "total_downloads": 504, + "min_param_count": 8028033024, + "sample_models": [ + "Yousefbahr/Turjman-Cold-Start" + ], + "relevancy_score": 22.2 }, { "architecture_id": "LISAForCausalLM", "total_models": 5, - "total_downloads": 5532, + "total_downloads": 5924, "min_param_count": null, "sample_models": [ "xinlai/LISA-13B-llama2-v1", @@ -3120,1317 +3210,1350 @@ "xinlai/LISA-13B-llama2-v1-explanatory", "MBZUAI/GLaMM-GranD-Pretrained" ], - "relevancy_score": 22.6 + "relevancy_score": 22.1 }, { "architecture_id": "Qwen2_5_VLForConditionalGeneration", "total_models": 3, - "total_downloads": 10261, + "total_downloads": 10615, "min_param_count": null, "sample_models": [ "OmniSVG/OmniSVG1.1_4B", "OmniSVG/OmniSVG1.1_8B", "OmniSVG/OmniSVG" ], - "relevancy_score": 22.6 + "relevancy_score": 22.1 + }, + { + "architecture_id": "OutlierMoEForCausalLM", + "total_models": 3, + "total_downloads": 1690, + "min_param_count": 22813220976, + "sample_models": [ + "Outlier-Ai/Outlier-40B-V3.2", + "Outlier-Ai/Outlier-10B-V3.2", + "Outlier-Ai/Outlier-70B-V3.2" + ], + "relevancy_score": 22.1 }, { "architecture_id": "OrionForCausalLM", "total_models": 2, - "total_downloads": 13834, + "total_downloads": 13391, "min_param_count": null, "sample_models": [ "OrionStarAI/Orion-14B-Chat", "OrionStarAI/Orion-14B-Base" ], - "relevancy_score": 22.6 - }, - { - "architecture_id": "GptOssPuzzleForCausalLM", - "total_models": 1, - "total_downloads": 17281, - "min_param_count": 90837823680, - "sample_models": [ - "nvidia/gpt-oss-puzzle-88B" - ], - "relevancy_score": 22.5 + "relevancy_score": 22.0 }, { "architecture_id": "HunYuanMoEV1ForCausalLM", "total_models": 1, - "total_downloads": 16793, + "total_downloads": 15552, "min_param_count": null, "sample_models": [ "tencent/Hunyuan-A13B-Instruct" ], - "relevancy_score": 22.4 + "relevancy_score": 21.7 }, { - "architecture_id": "Dots1ForCausalLM", + "architecture_id": "GravityMoEForCausalLM", "total_models": 2, - "total_downloads": 10490, - "min_param_count": 142774381696, + "total_downloads": 1655, + "min_param_count": 16242181824, "sample_models": [ - "rednote-hilab/dots.llm1.inst", - "rednote-hilab/dots.llm1.base" + "learning-unit/L1-16B-A3B", + "trillionlabs/Gravity-16B-A3B-Base" ], - "relevancy_score": 22.0 + "relevancy_score": 21.5 }, { "architecture_id": "MiniCPM3ForCausalLM", "total_models": 1, - "total_downloads": 14420, + "total_downloads": 14024, "min_param_count": null, "sample_models": [ "openbmb/MiniCPM3-4B" ], - "relevancy_score": 22.0 + "relevancy_score": 21.5 }, { - "architecture_id": "IQuestLoopCoderForCausalLM", + "architecture_id": "ArcticForCausalLM", "total_models": 1, - "total_downloads": 14278, - "min_param_count": 39794696320, + "total_downloads": 13989, + "min_param_count": null, "sample_models": [ - "IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct" + "Snowflake/snowflake-arctic-instruct" ], - "relevancy_score": 22.0 + "relevancy_score": 21.5 }, { - "architecture_id": "ArcticForCausalLM", - "total_models": 1, - "total_downloads": 14245, - "min_param_count": null, + "architecture_id": "Dots1ForCausalLM", + "total_models": 2, + "total_downloads": 10143, + "min_param_count": 142774381696, "sample_models": [ - "Snowflake/snowflake-arctic-instruct" + "rednote-hilab/dots.llm1.inst", + "rednote-hilab/dots.llm1.base" ], - "relevancy_score": 22.0 + "relevancy_score": 21.4 }, { - "architecture_id": "LlavaLlamaModel", - "total_models": 4, - "total_downloads": 5317, - "min_param_count": null, + "architecture_id": "IQuestLoopCoderForCausalLM", + "total_models": 1, + "total_downloads": 13700, + "min_param_count": 39794696320, "sample_models": [ - "Efficient-Large-Model/VILA1.5-3b", - "Efficient-Large-Model/NVILA-8B", - "Efficient-Large-Model/VILA1.5-13b", - "Efficient-Large-Model/NVILA-Lite-8B" + "IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct" ], - "relevancy_score": 21.8 + "relevancy_score": 21.4 }, { "architecture_id": "GPT2LMHeadCustomModel", "total_models": 2, - "total_downloads": 8956, + "total_downloads": 8718, "min_param_count": null, "sample_models": [ "bigcode/santacoder", "rbiojout/santacoder-odoo-15" ], - "relevancy_score": 21.7 - }, - { - "architecture_id": "MosaicGPT", - "total_models": 3, - "total_downloads": 6562, - "min_param_count": null, - "sample_models": [ - "anas-awadalla/mpt-1b-redpajama-200b", - "anas-awadalla/mpt-1b-redpajama-200b-dolly", - "anas-awadalla/mpt-1b-redpajama-200b-hf-style" - ], - "relevancy_score": 21.6 + "relevancy_score": 21.1 }, { - "architecture_id": "OpenMoeForCausalLM", - "total_models": 5, - "total_downloads": 3349, + "architecture_id": "MiniMaxText01ForCausalLM", + "total_models": 1, + "total_downloads": 11872, "min_param_count": null, - "sample_models": [ - "hpcai-tech/openmoe-8B", - "OrionZheng/openmoe-base", - "OrionZheng/openmoe-8b", - "hpcai-tech/openmoe-base", - "OrionZheng/openmoe-8b-chat" + "sample_models": [ + "MiniMaxAI/MiniMax-Text-01" ], - "relevancy_score": 21.5 + "relevancy_score": 21.1 }, { "architecture_id": "GPTRefactForCausalLM", "total_models": 2, - "total_downloads": 8330, + "total_downloads": 8406, "min_param_count": null, "sample_models": [ "refactai/Refact-1_6B-fim", "refactai/Refact-1_6-base" ], - "relevancy_score": 21.5 + "relevancy_score": 21.0 }, { "architecture_id": "CrystalCoderLMHeadModel", "total_models": 2, - "total_downloads": 7989, + "total_downloads": 8126, "min_param_count": null, "sample_models": [ "LLM360/Crystal", "LLM360/CrystalChat" ], - "relevancy_score": 21.4 + "relevancy_score": 20.9 + }, + { + "architecture_id": "OpenMoeForCausalLM", + "total_models": 5, + "total_downloads": 3190, + "min_param_count": null, + "sample_models": [ + "hpcai-tech/openmoe-8B", + "OrionZheng/openmoe-base", + "hpcai-tech/openmoe-base", + "OrionZheng/openmoe-8b", + "OrionZheng/openmoe-8b-chat" + ], + "relevancy_score": 20.8 + }, + { + "architecture_id": "Bagel", + "total_models": 1, + "total_downloads": 1657, + "min_param_count": 14691079811, + "sample_models": [ + "lmms-lab/BAGEL-7B-MoT-ver.LE" + ], + "relevancy_score": 20.8 }, { "architecture_id": "MobileLlamaForCausalLM", "total_models": 4, - "total_downloads": 4006, + "total_downloads": 4211, "min_param_count": null, "sample_models": [ "mtgv/MobileVLM_V2-1.7B", + "mtgv/MobileVLM-1.7B", "mtgv/MobileVLM_V2-7B", - "mtgv/MobileVLM_V2-3B", - "mtgv/MobileVLM-1.7B" + "mtgv/MobileVLM_V2-3B" + ], + "relevancy_score": 20.7 + }, + { + "architecture_id": "BlueLMForCausalLM", + "total_models": 3, + "total_downloads": 5311, + "min_param_count": null, + "sample_models": [ + "vivo-ai/BlueLM-7B-Chat", + "vivo-ai/BlueLM-7B-Chat-32K", + "vivo-ai/BlueLM-7B-Base" ], - "relevancy_score": 21.2 + "relevancy_score": 20.6 }, { "architecture_id": "modeling_camelidae.LlamaForCausalLM", "total_models": 3, - "total_downloads": 5505, + "total_downloads": 5073, "min_param_count": null, "sample_models": [ "hywu/Camelidae-8x34B", "hywu/Camelidae-8x7B", "hywu/Camelidae-8x13B" ], - "relevancy_score": 21.2 + "relevancy_score": 20.5 }, { - "architecture_id": "BlueLMForCausalLM", + "architecture_id": "MosaicGPT", "total_models": 3, - "total_downloads": 5360, + "total_downloads": 4781, "min_param_count": null, "sample_models": [ - "vivo-ai/BlueLM-7B-Chat", - "vivo-ai/BlueLM-7B-Base", - "vivo-ai/BlueLM-7B-Chat-32K" + "anas-awadalla/mpt-1b-redpajama-200b", + "anas-awadalla/mpt-1b-redpajama-200b-dolly", + "anas-awadalla/mpt-1b-redpajama-200b-hf-style" ], - "relevancy_score": 21.2 + "relevancy_score": 20.4 }, { - "architecture_id": "Bagel", - "total_models": 1, - "total_downloads": 1511, - "min_param_count": 14691079811, + "architecture_id": "MultiScaleForCausalLM", + "total_models": 3, + "total_downloads": 4272, + "min_param_count": null, "sample_models": [ - "lmms-lab/BAGEL-7B-MoT-ver.LE" + "KoinicLabs/AXL-Translate", + "KoinicLabs/AXL-Vision-v2", + "KoinicLabs/AXL-Chat-10M" ], - "relevancy_score": 21.0 + "relevancy_score": 20.1 }, { - "architecture_id": "GPT2Model", + "architecture_id": "ModernBertDecoderForCausalLM", "total_models": 2, - "total_downloads": 5577, + "total_downloads": 5509, "min_param_count": null, "sample_models": [ - "keshan/sinhala-gpt2", - "cerebras/Cerebras-GPT-13B" + "jhu-clsp/ettin-decoder-400m", + "jhu-clsp/ettin-decoder-32m" ], - "relevancy_score": 20.6 + "relevancy_score": 20.1 }, { "architecture_id": "LiquidForCausalLM", "total_models": 2, - "total_downloads": 5508, + "total_downloads": 5676, "min_param_count": null, "sample_models": [ "reaperdoesntknow/DNA-175M", "reaperdoesntknow/DNA-50M" ], - "relevancy_score": 20.6 + "relevancy_score": 20.1 }, { - "architecture_id": "ModernBertDecoderForCausalLM", - "total_models": 2, - "total_downloads": 5245, + "architecture_id": "LlavaLlamaModel", + "total_models": 3, + "total_downloads": 4035, "min_param_count": null, "sample_models": [ - "jhu-clsp/ettin-decoder-400m", - "jhu-clsp/ettin-decoder-32m" + "Efficient-Large-Model/VILA1.5-3b", + "Efficient-Large-Model/NVILA-8B", + "Efficient-Large-Model/VILA1.5-13b" ], - "relevancy_score": 20.5 + "relevancy_score": 20.0 }, { - "architecture_id": "BottleneckT5LMWithPerturb", - "total_models": 4, - "total_downloads": 2754, + "architecture_id": "GPT2Model", + "total_models": 2, + "total_downloads": 5435, "min_param_count": null, "sample_models": [ - "thesephist/contra-bottleneck-t5-small-wikipedia", - "thesephist/contra-bottleneck-t5-base-wikipedia", - "thesephist/contra-bottleneck-t5-large-wikipedia", - "thesephist/contra-bottleneck-t5-xl-wikipedia" + "cerebras/Cerebras-GPT-13B", + "keshan/sinhala-gpt2" ], - "relevancy_score": 20.4 + "relevancy_score": 20.0 }, { - "architecture_id": "MultiScaleForCausalLM", - "total_models": 3, - "total_downloads": 3737, + "architecture_id": "KonkanGPT", + "total_models": 2, + "total_downloads": 5113, "min_param_count": null, "sample_models": [ - "KoinicLabs/AXL-Vision-v2", - "KoinicLabs/AXL-Translate", - "KoinicLabs/AXL-Chat-10M" + "omdeep22/Gonyai-teo2", + "omdeep22/Gonyai-v1" ], - "relevancy_score": 20.4 + "relevancy_score": 19.9 }, { "architecture_id": "InternLMXComposer2ForCausalLM", "total_models": 1, - "total_downloads": 6744, + "total_downloads": 6712, "min_param_count": null, "sample_models": [ "internlm/internlm-xcomposer2-7b" ], - "relevancy_score": 20.4 + "relevancy_score": 19.9 }, { - "architecture_id": "KonkanGPT", - "total_models": 2, - "total_downloads": 4822, + "architecture_id": "BottleneckT5LMWithPerturb", + "total_models": 4, + "total_downloads": 2779, "min_param_count": null, "sample_models": [ - "omdeep22/Gonyai-teo2", - "omdeep22/Gonyai-v1" + "thesephist/contra-bottleneck-t5-small-wikipedia", + "thesephist/contra-bottleneck-t5-base-wikipedia", + "thesephist/contra-bottleneck-t5-large-wikipedia", + "thesephist/contra-bottleneck-t5-xl-wikipedia" ], - "relevancy_score": 20.3 + "relevancy_score": 19.8 }, { - "architecture_id": "GraphT5TransformerForConditionalGeneration", - "total_models": 1, - "total_downloads": 6732, + "architecture_id": "NanochatWasmFusedModel", + "total_models": 2, + "total_downloads": 4952, "min_param_count": null, "sample_models": [ - "haitengzhao/gimlet" + "eastlondoner/nanochat-wasm-fused-preview-01", + "eastlondoner/nanochat-wasm-fused-preview-02" ], - "relevancy_score": 20.3 + "relevancy_score": 19.8 }, { "architecture_id": "MobilintExaone4ForCausalLM", "total_models": 1, - "total_downloads": 6493, + "total_downloads": 6518, "min_param_count": null, "sample_models": [ "mobilint/EXAONE-4.0-1.2B" ], - "relevancy_score": 20.3 + "relevancy_score": 19.8 }, { "architecture_id": "LlamaMoEForCausalLM", "total_models": 3, - "total_downloads": 3428, + "total_downloads": 3452, "min_param_count": null, "sample_models": [ "llama-moe/LLaMA-MoE-v1-3_5B-2_8", "llama-moe/LLaMA-MoE-v1-3_0B-2_16", "llama-moe/LLaMA-MoE-v1-3_5B-4_16" ], - "relevancy_score": 20.2 + "relevancy_score": 19.7 }, { "architecture_id": "RobertaForCausalLM", "total_models": 2, - "total_downloads": 4402, + "total_downloads": 4451, "min_param_count": null, "sample_models": [ "uf-aice-lab/math-roberta", "gokceuludogan/ChemBERTaLM" ], - "relevancy_score": 20.1 + "relevancy_score": 19.6 }, { "architecture_id": "MossForCausalLM", "total_models": 2, - "total_downloads": 4348, + "total_downloads": 4390, "min_param_count": null, "sample_models": [ "OpenMOSS-Team/moss-moon-003-sft", "OpenMOSS-Team/moss-moon-003-base" ], - "relevancy_score": 20.0 + "relevancy_score": 19.6 }, { - "architecture_id": "BartForCausalLM", - "total_models": 2, - "total_downloads": 4190, + "architecture_id": "Qwen3TSForCausalLM", + "total_models": 1, + "total_downloads": 5950, "min_param_count": null, "sample_models": [ - "sanchit-gandhi/tiny-random-bart-fp16", - "hf-tiny-model-private/tiny-random-BartForCausalLM" + "bytedance-research/ChatTS-8B" ], - "relevancy_score": 20.0 + "relevancy_score": 19.6 }, { "architecture_id": "Int8OPTForCausalLM", "total_models": 2, - "total_downloads": 4147, + "total_downloads": 4242, "min_param_count": null, "sample_models": [ "mit-han-lab/opt-125m-smoothquant", "mit-han-lab/opt-6.7b-smoothquant" ], - "relevancy_score": 19.9 + "relevancy_score": 19.5 }, { - "architecture_id": "InternLMXComposerForCausalLM", - "total_models": 1, - "total_downloads": 5444, + "architecture_id": "BartForCausalLM", + "total_models": 2, + "total_downloads": 4186, "min_param_count": null, "sample_models": [ - "internlm/internlm-xcomposer-7b" + "sanchit-gandhi/tiny-random-bart-fp16", + "hf-tiny-model-private/tiny-random-BartForCausalLM" ], - "relevancy_score": 19.9 + "relevancy_score": 19.5 }, { "architecture_id": "TranceptionLMHeadModel", "total_models": 2, - "total_downloads": 3959, + "total_downloads": 4081, "min_param_count": null, "sample_models": [ "PascalNotin/Tranception_Large", "PascalNotin/Tranception_Small" ], - "relevancy_score": 19.8 + "relevancy_score": 19.4 }, { - "architecture_id": "ModelStarOLMhead", + "architecture_id": "InternLMXComposerForCausalLM", "total_models": 1, - "total_downloads": 5177, - "min_param_count": null, - "sample_models": [ - "Hawa-Al-Akram/StarO-Ai" - ], - "relevancy_score": 19.8 - }, - { - "architecture_id": "NanochatWasmFusedModel", - "total_models": 2, - "total_downloads": 3734, + "total_downloads": 5363, "min_param_count": null, "sample_models": [ - "eastlondoner/nanochat-wasm-fused-preview-01", - "eastlondoner/nanochat-wasm-fused-preview-02" + "internlm/internlm-xcomposer-7b" ], - "relevancy_score": 19.7 + "relevancy_score": 19.4 }, { - "architecture_id": "Qwen3TSForCausalLM", + "architecture_id": "ModelStarOLMhead", "total_models": 1, - "total_downloads": 5082, + "total_downloads": 5178, "min_param_count": null, "sample_models": [ - "bytedance-research/ChatTS-8B" + "Hawa-Al-Akram/StarO-Ai" ], - "relevancy_score": 19.7 + "relevancy_score": 19.3 }, { - "architecture_id": "TransfoXLLMHeadModel", - "total_models": 1, - "total_downloads": 4728, + "architecture_id": "Olmo2ForSequenceClassification", + "total_models": 2, + "total_downloads": 3565, "min_param_count": null, "sample_models": [ - "transfo-xl/transfo-xl-wt103" + "allenai/OLMo-2-1124-7B-RM", + "LifeWiki-ai/OLMo-2-1124-7B-RM" ], - "relevancy_score": 19.6 + "relevancy_score": 19.1 }, { - "architecture_id": "Olmo2ForSequenceClassification", - "total_models": 2, - "total_downloads": 3444, + "architecture_id": "GraphT5TransformerForConditionalGeneration", + "total_models": 1, + "total_downloads": 4795, "min_param_count": null, "sample_models": [ - "allenai/OLMo-2-1124-7B-RM", - "LifeWiki-ai/OLMo-2-1124-7B-RM" + "haitengzhao/gimlet" ], - "relevancy_score": 19.5 + "relevancy_score": 19.1 }, { "architecture_id": "EvafrillMoForCausalLM", "total_models": 1, - "total_downloads": 4484, + "total_downloads": 4556, "min_param_count": null, "sample_models": [ "pathcosmos/EVAFRILL-Mo-3B" ], - "relevancy_score": 19.4 + "relevancy_score": 19.0 }, { "architecture_id": "Qwen2TSForCausalLM", "total_models": 1, - "total_downloads": 3992, + "total_downloads": 4042, "min_param_count": null, "sample_models": [ "bytedance-research/ChatTS-14B" ], - "relevancy_score": 19.2 + "relevancy_score": 18.8 }, { "architecture_id": "QEDForCausalLM", "total_models": 1, - "total_downloads": 3794, + "total_downloads": 4040, "min_param_count": null, "sample_models": [ "levossadtchi/QED-75M" ], - "relevancy_score": 19.1 + "relevancy_score": 18.8 }, { - "architecture_id": "LongcatCausalLM", + "architecture_id": "MochivaForCausalLM", "total_models": 1, - "total_downloads": 3590, - "min_param_count": 561862880256, + "total_downloads": 3969, + "min_param_count": null, "sample_models": [ - "meituan-longcat/LongCat-Flash-Thinking-2601" + "Mochiva-team/Mochiva-model" ], - "relevancy_score": 18.9 + "relevancy_score": 18.7 }, { - "architecture_id": "YuanForCausalLM", - "total_models": 3, - "total_downloads": 1880, + "architecture_id": "TransfoXLLMHeadModel", + "total_models": 1, + "total_downloads": 3909, "min_param_count": null, "sample_models": [ - "IEITYuan/Yuan2-M32-hf", - "IEITYuan/Yuan2-2B-Mars-hf", - "IEITYuan/Yuan2-2B-Janus-hf" + "transfo-xl/transfo-xl-wt103" ], - "relevancy_score": 18.8 + "relevancy_score": 18.7 }, { - "architecture_id": "GomeForCausalLM", + "architecture_id": "LongcatCausalLM", "total_models": 1, - "total_downloads": 3428, - "min_param_count": null, + "total_downloads": 3688, + "min_param_count": 561862880256, "sample_models": [ - "Prositron/gome" + "meituan-longcat/LongCat-Flash-Thinking-2601" ], - "relevancy_score": 18.8 + "relevancy_score": 18.6 }, { - "architecture_id": "GravityMoEForCausalLM", + "architecture_id": "GomeForCausalLM", "total_models": 1, - "total_downloads": 541, - "min_param_count": 16242181824, + "total_downloads": 3500, + "min_param_count": null, "sample_models": [ - "learning-unit/L1-16B-A3B" + "Prositron/gome" ], - "relevancy_score": 18.7 + "relevancy_score": 18.5 }, { - "architecture_id": "GPT", - "total_models": 2, - "total_downloads": 2262, + "architecture_id": "YuanForCausalLM", + "total_models": 3, + "total_downloads": 1888, "min_param_count": null, "sample_models": [ - "LH-Tech-AI/Apex-1.5-Coder-Instruct-350M", - "LH-Tech-AI/Apex-1.5-Instruct-350M" + "IEITYuan/Yuan2-M32-hf", + "IEITYuan/Yuan2-2B-Mars-hf", + "IEITYuan/Yuan2-2B-Janus-hf" ], - "relevancy_score": 18.6 + "relevancy_score": 18.4 }, { "architecture_id": "MyAwesomeModelForCausalLM", "total_models": 1, - "total_downloads": 3025, + "total_downloads": 3098, "min_param_count": null, "sample_models": [ "dongbobo/MyAwesomeModel" ], - "relevancy_score": 18.6 + "relevancy_score": 18.2 }, { - "architecture_id": "CTRLLMHeadModel", + "architecture_id": "QHEARTForECGQA", "total_models": 1, - "total_downloads": 2889, + "total_downloads": 2956, "min_param_count": null, "sample_models": [ - "sshleifer/tiny-ctrl" + "Manhph2211/Q-HEART" ], - "relevancy_score": 18.5 + "relevancy_score": 18.1 }, { - "architecture_id": "CPMAntForCausalLM", + "architecture_id": "CTRLLMHeadModel", "total_models": 1, - "total_downloads": 2814, + "total_downloads": 2941, "min_param_count": null, "sample_models": [ - "openbmb/cpm-ant-10b" + "sshleifer/tiny-ctrl" ], - "relevancy_score": 18.4 + "relevancy_score": 18.1 }, { - "architecture_id": "TAMELM", + "architecture_id": "GPT2CustomLMHeadModel", "total_models": 1, - "total_downloads": 2738, + "total_downloads": 2852, "min_param_count": null, "sample_models": [ - "reaperdoesntknow/TameForCasualLM" + "fxmarty/tiny-testing-gpt2-remote-code" ], - "relevancy_score": 18.3 + "relevancy_score": 18.0 }, { - "architecture_id": "CoherenceMomentumModel", + "architecture_id": "TAMELM", "total_models": 1, - "total_downloads": 2731, + "total_downloads": 2823, "min_param_count": null, "sample_models": [ - "aisingapore/coherence-momentum" + "reaperdoesntknow/TameForCasualLM" ], - "relevancy_score": 18.3 + "relevancy_score": 18.0 }, { - "architecture_id": "GPT2CustomLMHeadModel", + "architecture_id": "CoherenceMomentumModel", "total_models": 1, - "total_downloads": 2691, + "total_downloads": 2756, "min_param_count": null, "sample_models": [ - "fxmarty/tiny-testing-gpt2-remote-code" + "aisingapore/coherence-momentum" ], - "relevancy_score": 18.3 + "relevancy_score": 17.9 }, { "architecture_id": "GPT2", "total_models": 1, - "total_downloads": 2643, + "total_downloads": 2709, "min_param_count": null, "sample_models": [ "NamrataThakur/Small_Language_Model_MHA_53M_Pretrained" ], - "relevancy_score": 18.3 + "relevancy_score": 17.9 }, { "architecture_id": "GQAGPT2", "total_models": 1, - "total_downloads": 2637, + "total_downloads": 2699, "min_param_count": null, "sample_models": [ "NamrataThakur/Small_Language_Model_GQA_48M_Pretrained" ], - "relevancy_score": 18.3 + "relevancy_score": 17.9 }, { - "architecture_id": "MoEGPT2", + "architecture_id": "ThinkerLM", "total_models": 1, - "total_downloads": 2636, + "total_downloads": 2697, "min_param_count": null, "sample_models": [ - "NamrataThakur/Small_Language_Model_MOE_127M_Pretrained" + "prskid1000/micro-Omni" ], - "relevancy_score": 18.3 + "relevancy_score": 17.9 }, { - "architecture_id": "ThinkerLM", + "architecture_id": "CPMAntForCausalLM", "total_models": 1, - "total_downloads": 2627, + "total_downloads": 2693, "min_param_count": null, "sample_models": [ - "prskid1000/micro-Omni" + "openbmb/cpm-ant-10b" ], - "relevancy_score": 18.2 + "relevancy_score": 17.9 }, { - "architecture_id": "QHEARTForECGQA", + "architecture_id": "D3PMSanskritModel", "total_models": 1, - "total_downloads": 2624, + "total_downloads": 2676, "min_param_count": null, "sample_models": [ - "Manhph2211/Q-HEART" + "bhsinghgrid/sanskrit-translation" ], - "relevancy_score": 18.2 + "relevancy_score": 17.9 }, { - "architecture_id": "SeerAttnLlamaForCausalLM", + "architecture_id": "GuppyLM", "total_models": 1, - "total_downloads": 2618, + "total_downloads": 2655, "min_param_count": null, "sample_models": [ - "SeerAttention/SeerAttention-Llama-3.1-8B-AttnGates" + "arman-bd/guppylm-9M" ], - "relevancy_score": 18.2 + "relevancy_score": 17.9 }, { - "architecture_id": "D3PMSanskritModel", + "architecture_id": "MoEGPT2", "total_models": 1, - "total_downloads": 2603, + "total_downloads": 2552, "min_param_count": null, "sample_models": [ - "bhsinghgrid/sanskrit-translation" + "NamrataThakur/Small_Language_Model_MOE_127M_Pretrained" ], - "relevancy_score": 18.2 + "relevancy_score": 17.8 }, { - "architecture_id": "MoYiForCausalLM", + "architecture_id": "JiRackTernary1B", "total_models": 1, - "total_downloads": 2433, + "total_downloads": 2529, "min_param_count": null, "sample_models": [ - "astanahub/alemllm" + "kgrabko/JiRackTernary_1b" ], - "relevancy_score": 18.1 + "relevancy_score": 17.8 }, { - "architecture_id": "Eagle3DeepseekV2ForCausalLM", + "architecture_id": "Speech2TextTransformerForConditionalGeneration", "total_models": 1, - "total_downloads": 2367, + "total_downloads": 2373, "min_param_count": null, "sample_models": [ - "nvidia/Kimi-K2.5-Thinking-Eagle3" + "valhalla/s2t_mustc_multilinguial_medium" ], - "relevancy_score": 18.0 + "relevancy_score": 17.6 }, { - "architecture_id": "Speech2TextTransformerForConditionalGeneration", + "architecture_id": "Eagle3DeepseekV2ForCausalLM", "total_models": 1, - "total_downloads": 2304, + "total_downloads": 2367, "min_param_count": null, "sample_models": [ - "valhalla/s2t_mustc_multilinguial_medium" + "nvidia/Kimi-K2.5-Thinking-Eagle3" ], - "relevancy_score": 18.0 + "relevancy_score": 17.6 }, { - "architecture_id": "Videollama2Qwen2ForCausalLM", + "architecture_id": "GPTXForCausalLM", "total_models": 1, - "total_downloads": 2257, + "total_downloads": 2295, "min_param_count": null, "sample_models": [ - "QuangTuan/MultiMood-7B-GRPO-VisualAudioText-Comp" + "AxiomicLabs/GPT-X-125M" ], - "relevancy_score": 17.9 + "relevancy_score": 17.5 }, { "architecture_id": "WhisperMixStyleForConditionalGeneration", "total_models": 1, - "total_downloads": 2206, + "total_downloads": 2258, "min_param_count": null, "sample_models": [ "wago5090/mixstyle_multi-s" ], - "relevancy_score": 17.9 + "relevancy_score": 17.5 }, { - "architecture_id": "AlinlightForCausalLM", + "architecture_id": "Videollama2Qwen2ForCausalLM", "total_models": 1, - "total_downloads": 2193, + "total_downloads": 2223, "min_param_count": null, "sample_models": [ - "EngineerGL/Alinlight" + "QuangTuan/MultiMood-7B-GRPO-VisualAudioText-Comp" ], - "relevancy_score": 17.8 + "relevancy_score": 17.5 }, { - "architecture_id": "GuppyLM", + "architecture_id": "GPT", + "total_models": 2, + "total_downloads": 1600, + "min_param_count": null, + "sample_models": [ + "LH-Tech-AI/Apex-1.5-Coder-Instruct-350M", + "LH-Tech-AI/Apex-1.5-Instruct-350M" + ], + "relevancy_score": 17.4 + }, + { + "architecture_id": "SeerAttnLlamaForCausalLM", "total_models": 1, - "total_downloads": 2179, + "total_downloads": 2171, "min_param_count": null, "sample_models": [ - "arman-bd/guppylm-9M" + "SeerAttention/SeerAttention-Llama-3.1-8B-AttnGates" ], - "relevancy_score": 17.8 + "relevancy_score": 17.4 }, { - "architecture_id": "LlamaForCausalLMEagle", + "architecture_id": "Typhoon2Audio2AudioForConditionalGeneration", "total_models": 1, - "total_downloads": 2127, + "total_downloads": 2169, "min_param_count": null, "sample_models": [ - "thunlp/LLaMA3-Instruct-8B-FR-Spec" + "typhoon-ai/llama3.1-typhoon2-audio-8b-instruct" ], - "relevancy_score": 17.8 + "relevancy_score": 17.4 }, { - "architecture_id": "JiRackTernary1B", + "architecture_id": "LlamaForCausalLMEagle", "total_models": 1, - "total_downloads": 2121, + "total_downloads": 2167, "min_param_count": null, "sample_models": [ - "kgrabko/JiRackTernary_1b" + "thunlp/LLaMA3-Instruct-8B-FR-Spec" ], - "relevancy_score": 17.8 + "relevancy_score": 17.4 }, { - "architecture_id": "RuGPT3XLForCausalLM", + "architecture_id": "DenseLLM", "total_models": 1, - "total_downloads": 2110, + "total_downloads": 2167, "min_param_count": null, "sample_models": [ - "evilfreelancer/ruGPT3XL" + "AlgoDriveAI/Sanskrit_Akkadian_LLM_v1.0" ], - "relevancy_score": 17.8 + "relevancy_score": 17.4 }, { - "architecture_id": "Typhoon2Audio2AudioForConditionalGeneration", + "architecture_id": "AlinlightForCausalLM", "total_models": 1, - "total_downloads": 2107, + "total_downloads": 2158, "min_param_count": null, "sample_models": [ - "typhoon-ai/llama3.1-typhoon2-audio-8b-instruct" + "EngineerGL/Alinlight" ], - "relevancy_score": 17.8 + "relevancy_score": 17.4 }, { - "architecture_id": "PointLLMLlamaForCausalLM", - "total_models": 2, - "total_downloads": 1493, + "architecture_id": "TeleFLMForCausalLM", + "total_models": 1, + "total_downloads": 2150, "min_param_count": null, "sample_models": [ - "RunsenXu/PointLLM_7B_v1.1_init", - "RunsenXu/PointLLM_7B_v1.2" + "CofeAI/Tele-FLM-1T" ], - "relevancy_score": 17.7 + "relevancy_score": 17.4 }, { - "architecture_id": "LlaMAForCausalLM", + "architecture_id": "TFGPT2LMHeadModel", "total_models": 1, - "total_downloads": 2097, + "total_downloads": 2140, "min_param_count": null, "sample_models": [ - "circulus/alpaca-7b" + "mymusise/gpt2-medium-chinese" ], - "relevancy_score": 17.7 + "relevancy_score": 17.4 }, { - "architecture_id": "TeleFLMForCausalLM", + "architecture_id": "LlaMAForCausalLM", "total_models": 1, - "total_downloads": 2093, + "total_downloads": 2121, "min_param_count": null, "sample_models": [ - "CofeAI/Tele-FLM-1T" + "circulus/alpaca-7b" ], - "relevancy_score": 17.7 + "relevancy_score": 17.4 }, { "architecture_id": "GeoVForCausalLM", "total_models": 1, - "total_downloads": 2090, + "total_downloads": 2118, "min_param_count": null, "sample_models": [ "GeoV/GeoV-9b" ], - "relevancy_score": 17.7 + "relevancy_score": 17.4 }, { - "architecture_id": "TFGPT2LMHeadModel", + "architecture_id": "GPTModelForTextGeneration", "total_models": 1, - "total_downloads": 2076, + "total_downloads": 2107, "min_param_count": null, "sample_models": [ - "mymusise/gpt2-medium-chinese" + "samkeet/GPT_124M-Instruct" ], - "relevancy_score": 17.7 + "relevancy_score": 17.4 }, { - "architecture_id": "RobertaPreLayerNormForCausalLM", - "total_models": 1, - "total_downloads": 2074, + "architecture_id": "IndexForCausalLM", + "total_models": 2, + "total_downloads": 1559, "min_param_count": null, "sample_models": [ - "hf-tiny-model-private/tiny-random-RobertaPreLayerNormForCausalLM" + "IndexTeam/Index-1.9B-Chat", + "IndexTeam/Index-1.9B-Pure" ], - "relevancy_score": 17.7 + "relevancy_score": 17.3 }, { "architecture_id": "ElectraForCausalLM", "total_models": 1, - "total_downloads": 2071, + "total_downloads": 2103, "min_param_count": null, "sample_models": [ "smeoni/nbme-electra-large-generator" ], - "relevancy_score": 17.7 + "relevancy_score": 17.3 }, { - "architecture_id": "GPTModelForTextGeneration", + "architecture_id": "PegasusForCausalLM", "total_models": 1, - "total_downloads": 2059, + "total_downloads": 2056, "min_param_count": null, "sample_models": [ - "samkeet/GPT_124M-Instruct" + "hf-tiny-model-private/tiny-random-PegasusForCausalLM" ], - "relevancy_score": 17.7 + "relevancy_score": 17.3 }, { - "architecture_id": "PegasusForCausalLM", + "architecture_id": "RobertaPreLayerNormForCausalLM", "total_models": 1, - "total_downloads": 2032, + "total_downloads": 2047, "min_param_count": null, "sample_models": [ - "hf-tiny-model-private/tiny-random-PegasusForCausalLM" + "hf-tiny-model-private/tiny-random-RobertaPreLayerNormForCausalLM" ], - "relevancy_score": 17.7 + "relevancy_score": 17.3 }, { "architecture_id": "BlenderbotForCausalLM", "total_models": 1, - "total_downloads": 2026, + "total_downloads": 2046, "min_param_count": null, "sample_models": [ "hf-tiny-model-private/tiny-random-BlenderbotForCausalLM" ], - "relevancy_score": 17.7 + "relevancy_score": 17.3 }, { - "architecture_id": "DenseLLM", + "architecture_id": "XModelForCausalLM", "total_models": 1, - "total_downloads": 2011, + "total_downloads": 2037, "min_param_count": null, "sample_models": [ - "AlgoDriveAI/Sanskrit_Akkadian_LLM_v1.0" + "XiaoduoAILab/Xmodel_LM" ], - "relevancy_score": 17.7 + "relevancy_score": 17.3 }, { - "architecture_id": "OtterForConditionalGeneration", - "total_models": 2, - "total_downloads": 1460, + "architecture_id": "EnergyTransformer", + "total_models": 1, + "total_downloads": 2031, "min_param_count": null, "sample_models": [ - "luodian/OTTER-Video-LLaMA7B-DenseCaption", - "luodian/OTTER-MPT1B-RPJama-Init" + "cccczshao/CALM-M" ], - "relevancy_score": 17.6 + "relevancy_score": 17.3 }, { - "architecture_id": "MonkeyLMHeadModel", - "total_models": 2, - "total_downloads": 1486, + "architecture_id": "MvpForCausalLM", + "total_models": 1, + "total_downloads": 2018, "min_param_count": null, "sample_models": [ - "echo840/Monkey-Chat", - "echo840/Monkey" + "hf-tiny-model-private/tiny-random-MvpForCausalLM" ], - "relevancy_score": 17.6 + "relevancy_score": 17.3 }, { - "architecture_id": "IndexForCausalLM", + "architecture_id": "OtterForConditionalGeneration", "total_models": 2, - "total_downloads": 1467, + "total_downloads": 1473, "min_param_count": null, "sample_models": [ - "IndexTeam/Index-1.9B-Chat", - "IndexTeam/Index-1.9B-Pure" + "luodian/OTTER-Video-LLaMA7B-DenseCaption", + "luodian/OTTER-MPT1B-RPJama-Init" ], - "relevancy_score": 17.6 + "relevancy_score": 17.2 }, { - "architecture_id": "EnergyTransformer", - "total_models": 1, - "total_downloads": 1991, + "architecture_id": "MonkeyLMHeadModel", + "total_models": 2, + "total_downloads": 1496, "min_param_count": null, "sample_models": [ - "cccczshao/CALM-M" + "echo840/Monkey-Chat", + "echo840/Monkey" ], - "relevancy_score": 17.6 + "relevancy_score": 17.2 }, { - "architecture_id": "MvpForCausalLM", - "total_models": 1, - "total_downloads": 1990, + "architecture_id": "PointLLMLlamaForCausalLM", + "total_models": 2, + "total_downloads": 1497, "min_param_count": null, "sample_models": [ - "hf-tiny-model-private/tiny-random-MvpForCausalLM" + "RunsenXu/PointLLM_7B_v1.1_init", + "RunsenXu/PointLLM_7B_v1.2" ], - "relevancy_score": 17.6 + "relevancy_score": 17.2 }, { - "architecture_id": "XModelForCausalLM", + "architecture_id": "ConditionalGPT2LMHeadModel", "total_models": 1, - "total_downloads": 1987, + "total_downloads": 1991, "min_param_count": null, "sample_models": [ - "XiaoduoAILab/Xmodel_LM" + "entropy/roberta_zinc_decoder" ], - "relevancy_score": 17.6 + "relevancy_score": 17.2 }, { - "architecture_id": "ConditionalGPT2LMHeadModel", + "architecture_id": "Qwen35ForCausalLM", "total_models": 1, - "total_downloads": 1951, + "total_downloads": 1971, "min_param_count": null, "sample_models": [ - "entropy/roberta_zinc_decoder" + "JeffGreen311/Eve-V2-Unleashed-Qwen3.5-8B-Liberated-4K-4B-Merged" ], - "relevancy_score": 17.6 + "relevancy_score": 17.2 }, { "architecture_id": "DebertaV2ForCausalLM", "total_models": 1, - "total_downloads": 1928, + "total_downloads": 1960, "min_param_count": null, "sample_models": [ "ltg/deberta-xxlarge-fixed" ], - "relevancy_score": 17.6 + "relevancy_score": 17.2 }, { - "architecture_id": "BTLMLMHeadModel", - "total_models": 2, - "total_downloads": 1403, + "architecture_id": "SpectusForConditionalGeneration", + "total_models": 1, + "total_downloads": 1945, "min_param_count": null, "sample_models": [ - "cerebras/btlm-3b-8k-base", - "EleutherAI/Hermes-btlm-3b-8k" + "MS-ML/SpecTUS_pretrained_only" ], - "relevancy_score": 17.5 + "relevancy_score": 17.2 }, { "architecture_id": "TelechatForCausalLM", "total_models": 2, - "total_downloads": 1363, + "total_downloads": 1426, "min_param_count": null, "sample_models": [ "Tele-AI/telechat-7B", "Tele-AI/TeleChat-12B" ], - "relevancy_score": 17.5 - }, - { - "architecture_id": "SpectusForConditionalGeneration", - "total_models": 1, - "total_downloads": 1908, - "min_param_count": null, - "sample_models": [ - "MS-ML/SpecTUS_pretrained_only" - ], - "relevancy_score": 17.5 + "relevancy_score": 17.1 }, { - "architecture_id": "GPTXForCausalLM", - "total_models": 1, - "total_downloads": 1871, + "architecture_id": "BTLMLMHeadModel", + "total_models": 2, + "total_downloads": 1437, "min_param_count": null, "sample_models": [ - "AxiomicLabs/GPT-X-125m-15bt" + "cerebras/btlm-3b-8k-base", + "EleutherAI/Hermes-btlm-3b-8k" ], - "relevancy_score": 17.5 + "relevancy_score": 17.1 }, { "architecture_id": "LSGBartForConditionalGeneration", "total_models": 1, - "total_downloads": 1843, + "total_downloads": 1871, "min_param_count": null, "sample_models": [ "morenolq/LEGIT-BART-LSG-4096" ], - "relevancy_score": 17.5 + "relevancy_score": 17.1 }, { "architecture_id": "CloverLMForCausalLM", "total_models": 1, - "total_downloads": 1822, + "total_downloads": 1833, "min_param_count": null, "sample_models": [ "daslab-testing/CloverLM" ], - "relevancy_score": 17.4 - }, - { - "architecture_id": "MiniMaxText01ForCausalLM", - "total_models": 1, - "total_downloads": 1682, - "min_param_count": null, - "sample_models": [ - "MiniMaxAI/MiniMax-Text-01" - ], - "relevancy_score": 17.3 + "relevancy_score": 17.0 }, { "architecture_id": "LlavaCrystalForCausalLM", "total_models": 1, - "total_downloads": 1586, + "total_downloads": 1614, "min_param_count": null, "sample_models": [ "LLM360/CrystalChat-7B-Web2Code" ], - "relevancy_score": 17.1 + "relevancy_score": 16.8 }, { - "architecture_id": "MobileLLMForCausalLM", + "architecture_id": "InternLM2ForRewardModel", "total_models": 1, - "total_downloads": 1585, + "total_downloads": 1562, "min_param_count": null, "sample_models": [ - "facebook/MobileLLM-125M" + "internlm/internlm2_5-step-prover-critic" ], - "relevancy_score": 17.1 + "relevancy_score": 16.7 }, { "architecture_id": "MobilintEagle3Qwen2ForCausalLM", "total_models": 1, - "total_downloads": 1541, + "total_downloads": 1543, "min_param_count": null, "sample_models": [ "mobilint/EAGLE3-JPharmatron-7B" ], - "relevancy_score": 17.1 + "relevancy_score": 16.7 }, { - "architecture_id": "InternLM2ForRewardModel", - "total_models": 1, - "total_downloads": 1527, + "architecture_id": "MPTForCausalLM", + "total_models": 2, + "total_downloads": 1115, "min_param_count": null, "sample_models": [ - "internlm/internlm2_5-step-prover-critic" + "hyungtae/mpt-30b", + "manojpreveen/mpt-30b-v5" ], - "relevancy_score": 17.0 + "relevancy_score": 16.6 }, { - "architecture_id": "Qwen35ForCausalLM", + "architecture_id": "MobileLLMForCausalLM", "total_models": 1, - "total_downloads": 1512, + "total_downloads": 1522, "min_param_count": null, "sample_models": [ - "JeffGreen311/Eve-V2-Unleashed-Qwen3.5-8B-Liberated-4K-4B-Merged" + "facebook/MobileLLM-125M" ], - "relevancy_score": 17.0 + "relevancy_score": 16.6 }, { "architecture_id": "GeoChatLlamaForCausalLM", "total_models": 1, - "total_downloads": 1454, + "total_downloads": 1416, "min_param_count": null, "sample_models": [ "MBZUAI/geochat-7B" ], - "relevancy_score": 16.9 + "relevancy_score": 16.5 }, { - "architecture_id": "MochivaForCausalLM", + "architecture_id": "Qwen3VLMoeForConditionalGeneration", "total_models": 1, - "total_downloads": 1438, - "min_param_count": null, + "total_downloads": 1385, + "min_param_count": 31070754032, "sample_models": [ - "Mochiva-team/Mochiva-model" + "Oysiyl/qwen3-vl-30b-a3b-unslop-good-lora-v1" ], - "relevancy_score": 16.9 + "relevancy_score": 16.4 }, { "architecture_id": "HeliumForCausalLM", "total_models": 1, - "total_downloads": 1363, + "total_downloads": 1309, "min_param_count": null, "sample_models": [ "kyutai/helium-1-preview-2b" ], - "relevancy_score": 16.8 + "relevancy_score": 16.3 }, { "architecture_id": "JiRackTernaryModel", "total_models": 1, - "total_downloads": 1289, + "total_downloads": 1292, "min_param_count": null, "sample_models": [ "kgrabko/JiRackTernary_70b" ], - "relevancy_score": 16.7 + "relevancy_score": 16.3 + }, + { + "architecture_id": "Papagan", + "total_models": 1, + "total_downloads": 1216, + "min_param_count": null, + "sample_models": [ + "SutskeverFanBoy/papagan_1.3b" + ], + "relevancy_score": 16.2 }, { "architecture_id": "PolyLMHeadModel", "total_models": 1, - "total_downloads": 1195, + "total_downloads": 1129, "min_param_count": null, "sample_models": [ "DAMO-NLP-MT/polylm-13b" ], - "relevancy_score": 16.5 + "relevancy_score": 16.0 }, { "architecture_id": "CambrianLlamaForCausalLM", "total_models": 1, - "total_downloads": 1073, + "total_downloads": 1124, "min_param_count": null, "sample_models": [ "nyu-visionx/cambrian-8b" ], - "relevancy_score": 16.3 + "relevancy_score": 16.0 }, { - "architecture_id": "LlamaModel", + "architecture_id": "ErnieForCausalLM", "total_models": 1, - "total_downloads": 1034, - "min_param_count": 33930165248, + "total_downloads": 1018, + "min_param_count": null, "sample_models": [ - "ngoan/NgoanYi" + "mohitsha/tiny-ernie-random-remote-code" ], - "relevancy_score": 16.2 + "relevancy_score": 15.8 }, { - "architecture_id": "TransnormerForCausalLM", + "architecture_id": "Qwen3_5MoeForCausalLM", "total_models": 1, - "total_downloads": 1030, - "min_param_count": null, + "total_downloads": 1000, + "min_param_count": 122111526912, "sample_models": [ - "OpenNLPLab/TransNormerLLM-385M" + "wangzhang/Qwen3.5-122B-A10B-abliterix" ], - "relevancy_score": 16.2 + "relevancy_score": 15.7 }, { - "architecture_id": "Qwen3VLMoeForConditionalGeneration", + "architecture_id": "XMistralForCausalLM", "total_models": 1, - "total_downloads": 997, - "min_param_count": 31070754032, + "total_downloads": 984, + "min_param_count": null, "sample_models": [ - "Oysiyl/qwen3-vl-30b-a3b-unslop-good-lora-v1" + "Hannibal046/xrag-7b" ], - "relevancy_score": 16.1 + "relevancy_score": 15.7 }, { - "architecture_id": "KimiK25ForConditionalGeneration", + "architecture_id": "TransnormerForCausalLM", "total_models": 1, - "total_downloads": 988, - "min_param_count": 91383180528, + "total_downloads": 973, + "min_param_count": null, "sample_models": [ - "Ex0bit/Kimi-K2.5-PRISM-REAP-530B-A32B" + "OpenNLPLab/TransNormerLLM-385M" ], - "relevancy_score": 16.1 + "relevancy_score": 15.7 }, { - "architecture_id": "ErnieForCausalLM", + "architecture_id": "YiForCausalLM", "total_models": 1, - "total_downloads": 982, + "total_downloads": 955, "min_param_count": null, "sample_models": [ - "mohitsha/tiny-ernie-random-remote-code" + "llmware/dragon-yi-6b-v0" ], - "relevancy_score": 16.1 + "relevancy_score": 15.6 }, { - "architecture_id": "ShikraLlamaForCausalLM", + "architecture_id": "SOVYN85M", "total_models": 1, - "total_downloads": 950, + "total_downloads": 949, "min_param_count": null, "sample_models": [ - "shikras/shikra-7b-delta-v1" + "SOVYN/SOVYN-85M" ], - "relevancy_score": 16.0 + "relevancy_score": 15.6 }, { - "architecture_id": "YiForCausalLM", + "architecture_id": "LlamaModel", + "total_models": 1, + "total_downloads": 948, + "min_param_count": 33930165248, + "sample_models": [ + "ngoan/NgoanYi" + ], + "relevancy_score": 15.6 + }, + { + "architecture_id": "ShikraLlamaForCausalLM", "total_models": 1, - "total_downloads": 939, + "total_downloads": 928, "min_param_count": null, "sample_models": [ - "llmware/dragon-yi-6b-v0" + "shikras/shikra-7b-delta-v1" ], - "relevancy_score": 16.0 + "relevancy_score": 15.6 }, { "architecture_id": "CpmBeeForCausalLM", "total_models": 1, - "total_downloads": 895, + "total_downloads": 911, "min_param_count": null, "sample_models": [ "openbmb/cpm-bee-10b" ], - "relevancy_score": 15.8 + "relevancy_score": 15.5 }, { "architecture_id": "ZsGPT2LMHeadModel", "total_models": 1, - "total_downloads": 882, + "total_downloads": 902, "min_param_count": null, "sample_models": [ "claritylab/zero-shot-vanilla-gpt2" ], - "relevancy_score": 15.8 + "relevancy_score": 15.5 }, { "architecture_id": "HumanGPTForCausalLM", "total_models": 1, - "total_downloads": 868, + "total_downloads": 876, "min_param_count": null, "sample_models": [ "YaoFeng/CHATPOSE-V0" ], - "relevancy_score": 15.8 + "relevancy_score": 15.4 }, { "architecture_id": "Phi4FlashForCausalLM", "total_models": 1, - "total_downloads": 809, + "total_downloads": 839, "min_param_count": null, "sample_models": [ "microsoft/Phi-4-mini-flash-reasoning" ], - "relevancy_score": 15.6 + "relevancy_score": 15.3 }, { - "architecture_id": "XMistralForCausalLM", + "architecture_id": "KimiK25ForConditionalGeneration", "total_models": 1, - "total_downloads": 796, - "min_param_count": null, + "total_downloads": 824, + "min_param_count": 91383180528, "sample_models": [ - "Hannibal046/xrag-7b" + "Ex0bit/Kimi-K2.5-PRISM-REAP-530B-A32B" ], - "relevancy_score": 15.6 + "relevancy_score": 15.3 }, { "architecture_id": "FlamingoForCausalLM", "total_models": 1, - "total_downloads": 791, + "total_downloads": 820, "min_param_count": null, "sample_models": [ "babylm/flamingo-2024" ], - "relevancy_score": 15.6 + "relevancy_score": 15.3 }, { - "architecture_id": "VStreamLlamaForCausalLM", + "architecture_id": "AquilaDenseForCausalLM", "total_models": 1, - "total_downloads": 762, + "total_downloads": 820, "min_param_count": null, "sample_models": [ - "IVGSZ/Flash-VStream-7b" + "BAAI/AquilaDense-7B" ], - "relevancy_score": 15.5 + "relevancy_score": 15.3 }, { - "architecture_id": "AquilaDenseForCausalLM", + "architecture_id": "EmuForCausalLM", "total_models": 1, - "total_downloads": 759, + "total_downloads": 795, "min_param_count": null, "sample_models": [ - "BAAI/AquilaDense-7B" + "BAAI/Emu2-Chat" ], - "relevancy_score": 15.5 + "relevancy_score": 15.2 }, { - "architecture_id": "EmuForCausalLM", + "architecture_id": "VStreamLlamaForCausalLM", "total_models": 1, - "total_downloads": 747, + "total_downloads": 780, "min_param_count": null, "sample_models": [ - "BAAI/Emu2-Chat" + "IVGSZ/Flash-VStream-7b" ], - "relevancy_score": 15.4 + "relevancy_score": 15.2 }, { "architecture_id": "MoELLaVAQWenForCausalLM", "total_models": 1, - "total_downloads": 728, + "total_downloads": 729, "min_param_count": null, "sample_models": [ "LanguageBind/MoE-LLaVA-Qwen-1.8B-4e" ], - "relevancy_score": 15.4 + "relevancy_score": 15.0 }, { "architecture_id": "YayiForCausalLM", "total_models": 1, - "total_downloads": 713, + "total_downloads": 724, "min_param_count": null, "sample_models": [ "wenge-research/yayi2-30b" ], - "relevancy_score": 15.3 + "relevancy_score": 15.0 + }, + { + "architecture_id": "STLlamaForCausalLM", + "total_models": 1, + "total_downloads": 723, + "min_param_count": null, + "sample_models": [ + "bjdwh/UrbanGPT" + ], + "relevancy_score": 15.0 }, { "architecture_id": "SkyworkForCausalLM", @@ -4440,57 +4563,67 @@ "sample_models": [ "Skywork/Skywork-13B-base" ], - "relevancy_score": 15.3 + "relevancy_score": 14.9 }, { "architecture_id": "MobiLlamaForCausalLM", "total_models": 1, - "total_downloads": 661, + "total_downloads": 667, "min_param_count": null, "sample_models": [ "MBZUAI/MobiLlama-05B" ], - "relevancy_score": 15.2 + "relevancy_score": 14.8 }, { - "architecture_id": "HebrewGPTForCausalLM", + "architecture_id": "JapaneseStableLMAlphaForCausalLM", "total_models": 1, - "total_downloads": 643, + "total_downloads": 656, "min_param_count": null, "sample_models": [ - "Slasky/HebrewGPT-1B" + "stabilityai/japanese-stablelm-base-alpha-7b" ], - "relevancy_score": 15.1 + "relevancy_score": 14.8 }, { "architecture_id": "GPTBigCodeLMHeadModel", "total_models": 1, - "total_downloads": 638, + "total_downloads": 654, "min_param_count": null, "sample_models": [ "bigcode/santacoderpack" ], - "relevancy_score": 15.1 + "relevancy_score": 14.8 + }, + { + "architecture_id": "SDARMoeForCausalLM", + "total_models": 1, + "total_downloads": 653, + "min_param_count": 30532122624, + "sample_models": [ + "JetLM/SDAR-30B-A3B-Chat-b32" + ], + "relevancy_score": 14.8 }, { "architecture_id": "GPTJiangForCausalLM", "total_models": 1, - "total_downloads": 627, + "total_downloads": 650, "min_param_count": null, "sample_models": [ "kdf/jiang-base" ], - "relevancy_score": 15.1 + "relevancy_score": 14.8 }, { - "architecture_id": "JapaneseStableLMAlphaForCausalLM", + "architecture_id": "HebrewGPTForCausalLM", "total_models": 1, - "total_downloads": 627, + "total_downloads": 646, "min_param_count": null, "sample_models": [ - "stabilityai/japanese-stablelm-base-alpha-7b" + "Slasky/HebrewGPT-1B" ], - "relevancy_score": 15.1 + "relevancy_score": 14.8 }, { "architecture_id": "BunnyQwenForCausalLM", @@ -4500,167 +4633,167 @@ "sample_models": [ "dphn/dolphin-vision-72b" ], - "relevancy_score": 15.0 + "relevancy_score": 14.7 }, { - "architecture_id": "SDARMoeForCausalLM", + "architecture_id": "GrokForCausalLM", "total_models": 1, "total_downloads": 619, - "min_param_count": 30532122624, + "min_param_count": null, "sample_models": [ - "JetLM/SDAR-30B-A3B-Chat-b32" + "keyfan/grok-1-hf" ], - "relevancy_score": 15.0 + "relevancy_score": 14.7 }, { - "architecture_id": "STLlamaForCausalLM", + "architecture_id": "LongcatFlashNgramForCausalLM", "total_models": 1, - "total_downloads": 617, + "total_downloads": 615, "min_param_count": null, "sample_models": [ - "bjdwh/UrbanGPT" + "meituan-longcat/LongCat-Flash-Lite" ], - "relevancy_score": 15.0 + "relevancy_score": 14.7 }, { - "architecture_id": "GrokForCausalLM", + "architecture_id": "LingoWhaleForCausalLM", "total_models": 1, - "total_downloads": 606, + "total_downloads": 595, "min_param_count": null, "sample_models": [ - "keyfan/grok-1-hf" + "deeplang-ai/LingoWhale-8B" ], - "relevancy_score": 15.0 + "relevancy_score": 14.6 }, { "architecture_id": "Llama2ForCausalLM", "total_models": 1, - "total_downloads": 590, + "total_downloads": 592, "min_param_count": null, "sample_models": [ "llmware/dragon-llama-7b-v0" ], - "relevancy_score": 14.9 + "relevancy_score": 14.6 }, { "architecture_id": "MPLUGOwl2LlamaForCausalLM", "total_models": 1, - "total_downloads": 589, + "total_downloads": 592, "min_param_count": null, "sample_models": [ "q-future/q-align-quality" ], - "relevancy_score": 14.9 + "relevancy_score": 14.6 }, { "architecture_id": "GLaMMForCausalLM", "total_models": 1, - "total_downloads": 587, + "total_downloads": 585, "min_param_count": null, "sample_models": [ "MBZUAI/GLaMM-FullScope" ], - "relevancy_score": 14.9 + "relevancy_score": 14.6 }, { - "architecture_id": "LingoWhaleForCausalLM", + "architecture_id": "OLMoModelForCausalLM", "total_models": 1, - "total_downloads": 583, + "total_downloads": 585, "min_param_count": null, "sample_models": [ - "deeplang-ai/LingoWhale-8B" + "NousResearch/OLMo-Bitnet-1B" ], - "relevancy_score": 14.9 + "relevancy_score": 14.6 }, { "architecture_id": "OpenBAForConditionalGeneration", "total_models": 1, - "total_downloads": 581, + "total_downloads": 576, "min_param_count": null, "sample_models": [ "OpenNLG/OpenBA-V1-Based" ], - "relevancy_score": 14.9 + "relevancy_score": 14.5 }, { - "architecture_id": "OLMoModelForCausalLM", + "architecture_id": "GPTJXForCausalLM", "total_models": 1, - "total_downloads": 573, + "total_downloads": 574, "min_param_count": null, "sample_models": [ - "NousResearch/OLMo-Bitnet-1B" + "KnutJaegersberg/GPT-JX-3b" ], - "relevancy_score": 14.9 + "relevancy_score": 14.5 }, { - "architecture_id": "GPTJXForCausalLM", + "architecture_id": "LlavaStableLMEpochForCausalLM", "total_models": 1, - "total_downloads": 566, + "total_downloads": 540, "min_param_count": null, "sample_models": [ - "KnutJaegersberg/GPT-JX-3b" + "NousResearch/Obsidian-3B-V0.5" ], - "relevancy_score": 14.8 + "relevancy_score": 14.4 }, { - "architecture_id": "Qwen3_5MoeForCausalLM", + "architecture_id": "AprielHForCausalLM", "total_models": 1, - "total_downloads": 562, - "min_param_count": 122111526912, + "total_downloads": 538, + "min_param_count": null, "sample_models": [ - "wangzhang/Qwen3.5-122B-A10B-abliterix" + "ServiceNow-AI/Apriel-H1-15b-Thinker-SFT" ], - "relevancy_score": 14.8 + "relevancy_score": 14.4 }, { - "architecture_id": "LlavaStableLMEpochForCausalLM", + "architecture_id": "CacaForCausalLM", "total_models": 1, - "total_downloads": 547, + "total_downloads": 530, "min_param_count": null, "sample_models": [ - "NousResearch/Obsidian-3B-V0.5" + "Lyon28/caca-1B-untrained" ], - "relevancy_score": 14.7 + "relevancy_score": 14.3 }, { - "architecture_id": "VSMForCausalLM", + "architecture_id": "M2M100ForConditionalGeneration", "total_models": 1, - "total_downloads": 544, + "total_downloads": 529, "min_param_count": null, "sample_models": [ - "craigwu/seal_vsm_7b" + "dsfsi/nso-en-m2m100-gov" ], - "relevancy_score": 14.7 + "relevancy_score": 14.3 }, { - "architecture_id": "LlavaSearchLlamaForCausalLM", + "architecture_id": "HgrnForCausalLM", "total_models": 1, - "total_downloads": 543, + "total_downloads": 526, "min_param_count": null, "sample_models": [ - "craigwu/seal_vqa_7b" + "OpenNLPLab/HGRN-150M" ], - "relevancy_score": 14.7 + "relevancy_score": 14.3 }, { - "architecture_id": "AprielHForCausalLM", + "architecture_id": "LlavaSearchLlamaForCausalLM", "total_models": 1, - "total_downloads": 530, + "total_downloads": 524, "min_param_count": null, "sample_models": [ - "ServiceNow-AI/Apriel-H1-15b-Thinker-SFT" + "craigwu/seal_vqa_7b" ], - "relevancy_score": 14.7 + "relevancy_score": 14.3 }, { - "architecture_id": "LlavaMistralForCausalLM", + "architecture_id": "SeerAttnQwen3ForCausalLM", "total_models": 1, - "total_downloads": 527, + "total_downloads": 523, "min_param_count": null, "sample_models": [ - "NousResearch/Nous-Hermes-2-Vision-Alpha" + "jiwonsong/SeerAttention-Qwen3-8B-AttnGates" ], - "relevancy_score": 14.7 + "relevancy_score": 14.3 }, { "architecture_id": "MedHemoModel", @@ -4670,27 +4803,27 @@ "sample_models": [ "amewebstudio/medhemo-earcp" ], - "relevancy_score": 14.6 + "relevancy_score": 14.3 }, { - "architecture_id": "HgrnForCausalLM", + "architecture_id": "VSMForCausalLM", "total_models": 1, - "total_downloads": 513, + "total_downloads": 521, "min_param_count": null, "sample_models": [ - "OpenNLPLab/HGRN-150M" + "craigwu/seal_vsm_7b" ], - "relevancy_score": 14.6 + "relevancy_score": 14.3 }, { - "architecture_id": "M2M100ForConditionalGeneration", + "architecture_id": "LlavaMistralForCausalLM", "total_models": 1, - "total_downloads": 501, + "total_downloads": 510, "min_param_count": null, "sample_models": [ - "dsfsi/nso-en-m2m100-gov" + "NousResearch/Nous-Hermes-2-Vision-Alpha" ], - "relevancy_score": 14.6 + "relevancy_score": 14.3 } ] } \ No newline at end of file diff --git a/transformer_lens/tools/model_registry/data/supported_models.json b/transformer_lens/tools/model_registry/data/supported_models.json index c037dedfa..fdce49a70 100644 --- a/transformer_lens/tools/model_registry/data/supported_models.json +++ b/transformer_lens/tools/model_registry/data/supported_models.json @@ -1,22 +1,22 @@ { - "generated_at": "2026-04-10", + "generated_at": "2026-04-14", "scan_info": { - "total_scanned": 5436, + "total_scanned": 5633, "task_filter": "text-generation", "min_downloads": 500, - "scan_duration_seconds": 3.9 + "scan_duration_seconds": 4.2 }, - "total_architectures": 43, - "total_models": 7006, - "total_verified": 704, + "total_architectures": 47, + "total_models": 7426, + "total_verified": 706, "models": [ { "architecture_id": "Qwen3NextForCausalLM", "model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "status": 2, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, - "note": "Estimated 6929.6 GB exceeds 96.0 GB limit", + "note": "Estimated 708.8 GB exceeds 96.0 GB limit", "phase1_score": null, "phase2_score": null, "phase3_score": null, @@ -28,9 +28,9 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "unsloth/Qwen3-Coder-Next", "status": 2, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, - "note": "Estimated 6929.6 GB exceeds 96.0 GB limit", + "note": "Estimated 708.8 GB exceeds 96.0 GB limit", "phase1_score": null, "phase2_score": null, "phase3_score": null, @@ -42,9 +42,9 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "status": 2, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, - "note": "Estimated 6929.6 GB exceeds 96.0 GB limit", + "note": "Estimated 708.8 GB exceeds 96.0 GB limit", "phase1_score": null, "phase2_score": null, "phase3_score": null, @@ -56,7 +56,7 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "tiny-random/qwen3-next-moe", "status": 1, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, @@ -70,7 +70,7 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "optimum-intel-internal-testing/tiny-random-qwen3-next", "status": 1, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, @@ -84,7 +84,7 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "yujiepan/qwen3-next-moe-tiny-random", "status": 1, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, @@ -98,9 +98,9 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "huihui-ai/Huihui-Qwen3-Coder-Next-abliterated", "status": 2, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, - "note": "Estimated 6929.6 GB exceeds 96.0 GB limit", + "note": "Estimated 708.8 GB exceeds 96.0 GB limit", "phase1_score": null, "phase2_score": null, "phase3_score": null, @@ -112,9 +112,9 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "Qwen/Qwen3-Coder-Next-Base", "status": 2, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, - "note": "Estimated 6929.6 GB exceeds 96.0 GB limit", + "note": "Estimated 708.8 GB exceeds 96.0 GB limit", "phase1_score": null, "phase2_score": null, "phase3_score": null, @@ -126,9 +126,9 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "bknyaz/Qwen3-Coder-Next-REAM", "status": 2, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": null, - "note": "Estimated 5201.5 GB exceeds 96.0 GB limit", + "note": "Estimated 535.9 GB exceeds 96.0 GB limit", "phase1_score": null, "phase2_score": null, "phase3_score": null, @@ -140,7 +140,7 @@ "architecture_id": "Qwen3NextForCausalLM", "model_id": "Qwen/Qwen3-Coder-Next", "status": 2, - "verified_date": "2026-04-10", + "verified_date": "2026-04-15", "metadata": { "downloads": 664116, "likes": 0, @@ -153,7 +153,7 @@ ], "parameter_count": 79674391296 }, - "note": "Estimated 6929.6 GB exceeds 96.0 GB limit", + "note": "Estimated 708.8 GB exceeds 96.0 GB limit", "phase1_score": null, "phase2_score": null, "phase3_score": null, @@ -1999,9 +1999,9 @@ "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-h-small", "status": 2, - "verified_date": "2026-03-17", + "verified_date": "2026-04-15", "metadata": null, - "note": "Estimated 135.9 GB exceeds 75.2 GB limit", + "note": "Estimated 270.8 GB exceeds 96.0 GB limit", "phase1_score": null, "phase2_score": null, "phase3_score": null, @@ -2362,14 +2362,14 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-micro-base", - "status": 0, - "verified_date": null, + "status": 1, + "verified_date": "2026-04-15", "metadata": null, - "note": null, - "phase1_score": null, - "phase2_score": null, - "phase3_score": null, - "phase4_score": null, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 88.7, "phase7_score": null, "phase8_score": null }, @@ -2573,13 +2573,13 @@ "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-micro", "status": 1, - "verified_date": "2026-03-17", + "verified_date": "2026-04-15", "metadata": null, - "note": "Core verification completed", + "note": "Full verification completed", "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, - "phase4_score": 72.2, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 98.9, "phase7_score": null, "phase8_score": null }, @@ -3342,14 +3342,14 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-h-tiny", - "status": 1, - "verified_date": "2026-03-17", + "status": 3, + "verified_date": "2026-04-15", "metadata": null, - "note": "Core verification completed", - "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, - "phase4_score": 77.5, + "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/347 components failed (72 critical)", + "phase1_score": 50.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 96.6, "phase7_score": null, "phase8_score": null }, @@ -4140,13 +4140,13 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-h-micro", - "status": 1, - "verified_date": "2026-03-17", + "status": 3, + "verified_date": "2026-04-15", "metadata": null, - "note": "Core verification completed", - "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, + "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/307 components failed (72 critical)", + "phase1_score": 50.0, + "phase2_score": 100.0, + "phase3_score": 100.0, "phase4_score": 98.2, "phase7_score": null, "phase8_score": null @@ -4182,14 +4182,14 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-tiny-preview", - "status": 1, - "verified_date": "2026-03-17", + "status": 3, + "verified_date": "2026-04-15", "metadata": null, - "note": "Core verification completed", - "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, - "phase4_score": 97.4, + "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/347 components failed (72 critical)", + "phase1_score": 50.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 98.7, "phase7_score": null, "phase8_score": null }, @@ -4280,14 +4280,14 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-350m", - "status": 0, - "verified_date": null, + "status": 1, + "verified_date": "2026-04-15", "metadata": null, - "note": null, - "phase1_score": null, - "phase2_score": null, - "phase3_score": null, - "phase4_score": null, + "note": "Full verification completed with issues: P2=91.7% (failed: generation)", + "phase1_score": 100.0, + "phase2_score": 91.7, + "phase3_score": 100.0, + "phase4_score": 94.7, "phase7_score": null, "phase8_score": null }, @@ -5344,14 +5344,14 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-h-1b", - "status": 0, - "verified_date": null, + "status": 3, + "verified_date": "2026-04-15", "metadata": null, - "note": null, - "phase1_score": null, - "phase2_score": null, - "phase3_score": null, - "phase4_score": null, + "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/307 components failed (72 critical)", + "phase1_score": 50.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 72.2, "phase7_score": null, "phase8_score": null }, @@ -5540,14 +5540,14 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-h-350m", - "status": 0, - "verified_date": null, + "status": 3, + "verified_date": "2026-04-15", "metadata": null, - "note": null, - "phase1_score": null, - "phase2_score": null, - "phase3_score": null, - "phase4_score": null, + "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 56/243 components failed (56 critical)", + "phase1_score": 50.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 94.8, "phase7_score": null, "phase8_score": null }, @@ -5946,14 +5946,14 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-1b", - "status": 0, - "verified_date": null, + "status": 1, + "verified_date": "2026-04-15", "metadata": null, - "note": null, - "phase1_score": null, - "phase2_score": null, - "phase3_score": null, - "phase4_score": null, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 100.0, "phase7_score": null, "phase8_score": null }, @@ -8186,11 +8186,11 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "onnx-community/granite-4.0-350m-ONNX-web", - "status": 0, - "verified_date": null, + "status": 3, + "verified_date": "2026-04-14", "metadata": null, - "note": null, - "phase1_score": null, + "note": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: onnx-community/granite-4.0-350m-ONNX-web does not appear to have a file named pytorch_model.bin or model", + "phase1_score": 0.0, "phase2_score": null, "phase3_score": null, "phase4_score": null, @@ -8746,11 +8746,11 @@ { "architecture_id": "GraniteMoeHybridForCausalLM", "model_id": "ibm-granite/granite-4.0-350m-base", - "status": 0, - "verified_date": null, + "status": 3, + "verified_date": "2026-04-14", "metadata": null, - "note": null, - "phase1_score": null, + "note": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: 'NoneType' object has no attribute 'in_proj'", + "phase1_score": 0.0, "phase2_score": null, "phase3_score": null, "phase4_score": null, @@ -99556,15 +99556,15 @@ "architecture_id": "Qwen3_5ForCausalLM", "model_id": "Qwen/Qwen3.5-0.8B", "status": 1, - "verified_date": "2026-04-14", + "verified_date": "2026-04-15", "metadata": { "downloads": 2577198, "total_params": 950000000 }, - "note": "Full verification completed with issues: P3=94.1% (failed: attention_output_centering)", + "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, - "phase3_score": 94.1, + "phase3_score": 100.0, "phase4_score": 91.5, "phase7_score": null, "phase8_score": null @@ -99602,6 +99602,4934 @@ "phase4_score": null, "phase7_score": null, "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "llmfan46/Darkidol-Ballad-27B-ultra-uncensored-heretic-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "contextboxai/Qwen3-1.7B-FC", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "tomg-group-umd/DynaGuard-1.7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "principled-intelligence/Qwen3.5-9B-text-only", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "kai-os/Carnice-9b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "activeDap/Qwen3-1.7B_hh_harmful", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "GoodStartLabs/gin-rummy-hbc-qwen3.5-0.8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "bigatuna/Qwen3-1.7B-Sushi-Coder", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "prefeitura-rio/Rio-3.0-Open-Mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "pthinc/Cicikus_v4_0.3B_Pitircik", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "joekarim/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-foxy_peckish_pigeon", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_11_13_31", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_11_13_41", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "canoplos/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-soft_gilded_alligator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_11_13_52", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "aifeifei798/Darkidol-Ballad-27B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "justindal/llama3.1-8b-leetcoder", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-4b-code-forged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "numnum1/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-reclusive_mangy_zebra", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "brocchirodrigo/anotaai-ajuda-qwen3_5_Q4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "VladShash/deepseek-math-7b-lean-prover-dpo-olmo-3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sandbagging-games/cedar", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "llmfan46/Darkidol-Ballad-27B-ultra-uncensored-heretic-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "evolai/evolai_qwen_9B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_10_07_50", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_10_07_53", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_10_07_47", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "RMCian/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-fast_rabid_ram", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "Phonsiri/Qwen3.5-9B-Thai-Law-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "helly777/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-pudgy_dormant_salmon", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "lukey03/Qwen3.5-9B-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Asib1/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-pensive_leggy_ant", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "aifeifei798/Darkidol-Ballad-9B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "heyalexchoi/qwen3-1.7b-math-grpo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Loty1/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-rugged_trotting_puffin", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "aifeifei798/Darkidol-Catgirl-9B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-27b-code-forged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "justindal/llama3.1-8b-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "0xsage/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-foxy_slender_slug", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_12_13_14", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "ricdomolm/mini-coder-1.7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_12_13_17", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Naphula/Cthulhu-70B-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "small-models-for-glam/Qwen3.5-0.8B-SFT-name-parser-yaml", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-4b-code-128k-forged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "heisengert/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-stalking_polished_seahorse", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "wangzhang/Qwen3.5-27B-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-27b-code-forged-defragged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "squ11z1/claude-oss", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-4b-code-forged-defragged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "bungamawar/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-dense_alert_turkey", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MixtralForCausalLM", + "model_id": "continuum-ai/mixtral-8x7b-instruct-compacted-conservative", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "kai-os/Carnice-27b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "alamios/Mistral-Small-3.1-DRAFT-0.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "small-models-for-glam/Qwen3.5-2B-SFT-name-parser-yaml", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "principled-intelligence/Qwen3.5-2B-text-only", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "TommyChien/memorag-qwen2-7b-inst", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "stratosphere/qwen2.5-1.5b-slips-immune-summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Hiroshi19781111/ichiyanagi-qwen-14b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "ragav4075/room_service_action_gemma", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "vrutkovs/Lusterka-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "RefalMachine/RuadaptQwen2.5-32B-Pro-Beta", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "afroneko/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-smooth_patterned_tortoise", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "Harsh2026verma/code-generator-model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "cs1090b/hw5-part3-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "XSCP/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-endangered_lively_eel", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "PWLabs/Damork", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "h34v7/Qwanko3.5-27B-V2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Hippocrene/MiniLLM-0.1B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "yosef-samy019/gpt-face-celeb-generator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "kenny2021/episodic-lora-grpo2-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "heyalexchoi/qwen3-1.7b-math-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MambaForCausalLM", + "model_id": "batteryphil/mamba-2.8b-latent", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Hyeongwon/P2-split2_prob_rg_Qwen3-4B-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "AtaaJL/MediBot_Final", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "haedahae/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-hoarse_hairy_lion", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "principled-intelligence/Qwen3.5-0.8B-text-only", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "HCY123902/mistral-7b-inst-dpo-on-p-tw7-beta-1e-0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "empero-ai/Qwen3.5-9B-Claude-Code", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "W-61/llama-3-8b-base-sft-ultrachat-8xh200", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "baddddddddd/llama-85m-unigram-16k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/bold_formatting-Qwen3-0.6B-OURS_self-seed_0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Dominic/smollm135_fullprec_tinystories", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "cs1090b/hw5-part2-domain-adapted", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Priyangshu-2003/MediBridge-II-Medical-8B-1706-FineTuned", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "noobmaster6009/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-deadly_sturdy_parrot", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "123aloo123/BitNet-GPT2-125M-Ternary", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/bold_formatting-Qwen3-0.6B-baseline_all_tokens-seed_0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "empero-ai/Qwen3.5-9B-Claude-Opus-4.6-Distill", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-9b-general-forged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ericflo/Llama-3.1-8B-ContinuedTraining2-FFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Writer/palmyra-mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "Verdugie/STEM-Oracle-27B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "StentorLabs/Portimbria-150M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "rosebot/signed-model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "heommi/fintech_2026", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "beyoru/Luna-Ethos", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "ligeng-dev/Q3-8B-131072-sft-1x-20260331_091938", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "huihui-ai/Phi-4-mini-instruct-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "vanshkamra12/CyberSecurity-Model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "tacodevs/Behemoth-X-R1-123B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "francescofiamingo1/FF_3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Olmo3ForCausalLM", + "model_id": "VladShash/olmo-3-7b-lean-prover-dpo-olmo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "amphora/math-custom-data", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Shahansha/Manthan-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "celestialcreator/axon-smollm2-360m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "TriviumLabs/lpt-1-full", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-4b-general-forged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-0.8b-general-forged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "unsloth/Qwen2.5-Math-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "PJMixers-Dev/gemma-3-1b-it-fixed", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "furiosa-ai/Llama-3.1-8B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "nvidia/OpenCodeReasoning-Nemotron-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "laulauland/Qwen3.5-0.8B-overpass-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "taide/Llama3-TAIDE-LX-8B-Chat-Alpha1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "yipengsun/mochi-fish-135m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "continuum-ai/qwen3.5-2b-general-forged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "mkashifali1/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-arctic_muscular_heron", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "BigRay0x/Qwen3-0.6B-Gensyn-Swarm-moist_dense_mole", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "ApertusForCausalLM", + "model_id": "anicka/karma-electric-apertus-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Gensyn/Qwen2.5-1.5B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "kurtpayne/skillscan-detector-v4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_13_15_38", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "josephmayo/Qwen2.5-0.5B-Unfettered", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Neelectric/Llama-3.1-8B-Instruct_SafeGrad_mathv00.03", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "onnx-community/granite-4.0-1b-ONNX-web", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "aguitachan/Test-okuru", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "charles22/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-timid_stinky_bat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Hyeongwon/P2-split2_prob_rg_v2_Qwen3-4B-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "cs1090b/hw5-part1-tiny-gpt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "meta-llama/CodeLlama-13b-Instruct-hf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "eojin1/fine_tune_practice", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "kenpath/qwen3.5-0.8b-stage3-neucodec-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "HyzeAI/HyzeMini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "Shams03/tawkeed-egy-medical-4b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "Andrewstivan/AURA", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Nonamec/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-invisible_playful_cat", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "GPAcc/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-giant_skittish_hamster", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "GoodStartLabs/gin-rummy-hbc-qwen3.5-2b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "anicka/karma-electric-qwen25-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "nvidia/OpenCodeReasoning-Nemotron-1.1-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Sepolian/qwen2.5-0.5B-math", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "kenny2021/episodic-lora-grpo2b-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "StableLmForCausalLM", + "model_id": "ragraph-ai/stable-cypher-instruct-3b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "yunhwa/ai_question", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "UmbrellaInc/Special-Virus-3.2-1B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "airev-ae/Qwen-0.8B-AgentJSON", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "parthbijpuriya/qwen2.5-7b-finetuned-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "pvlabs/Chytrej2-90M-Base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "parallel-reasoner/threadweaver-qwen3-8b-131072-sft8x", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "Karthikappi0011/qwen3.5-indian-tts-data", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "hhuihiu/ADAM-STUDIO-MAX", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "PujaSe/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-raging_grazing_chameleon", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "tomvaillant/qwen3-4b-journalist-ONNX", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "Evelyn67/Qwen3.5-2B-Her", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "shabieh2/3370_0412", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "SykoSLM/SykoLLM-V5.9-Mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "analist/oute_ewe_16bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "imshreyansh/EVX-7B-Instruct-Pro", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "newgr/qwen2.5-tool-finetuned-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "kesavamas/qwen-1.7b-mochi", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "VVen/llama32-1b-lora-sft-lab10-model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "sstoica12/acquisition_metamath_llama_instruct_3b_math_confidence_500_combined_metamath", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_13_15_39", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "OpenmindAGI/functiongemma-finetuned-g1-multilingual", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "unsloth/SmolLM-1.7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "AIMS2025/DeepSignal", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "nosetalgiaULTRA/dummy_model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "canbingol/gemma3_1B_base-tr-cpt-only_4th_stage_data", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Nodmix/Nodmix-IQ", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "lew96123/Qwen3.5-0.8B-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Gensyn/Qwen2.5-7B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "odats/rl_nmt_2026_04_13_15_40", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "mohda/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-moist_beaked_chameleon", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "staeiou/bartleby-dlo-qwen3.5-2b-base-cpt-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "proshantasaha/gemma-3-1b-medical-finetuned", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "UmbrellaInc/PG67A-W-Serum.Test-3.2-1B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "wcn123/Qwen3.5-27B-WebNovel-Writer-zh", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "cjiao/OpenThinker3-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "snapappraise/qwen35-9b-jewelry-v4-modal", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Phi3ForCausalLM", + "model_id": "SykoSLM/SykoLLM-V5.8-Mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "SimpleStories/SimpleStories-35M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "longtermrisk/Qwen2.5-Coder-32B-Instruct-ftjob-5a583bbbe2e8", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "dnotitia/Smoothie-Qwen3-1.7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "jason-schulz/Carnice-9b-MLX", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "OrionLLM/Terminus-Qwen3-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "GoodStartLabs/gin-rummy-hbc-qwen3.5-4b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "giants2026/GIANTS-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "arcee-ai/Meraj-Mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "DineshKasi/ai-assistant", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ND0322/llama-3.1-8B-recipe-gen", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "arcee-ai/Arcee-Spark", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "unsloth/SmolLM2-360M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Navpy/phi-3.5-AI-Vtuber-json", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Outlier-Ai/Outlier-150B-V3.2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "meshllm/mistral-7b-instruct-v0.3-parity-bf16-mlx", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "miketester10/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tiny_pensive_mandrill", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "HCY123902/qwen25_7b_base_hc_ssts_n32_r1_dpo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "SeeYangZhi/Llama-3.2-1B-Sarcasm-Rewriter", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma2ForCausalLM", + "model_id": "MBZUAI-Paris/Atlas-Chat-2B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "aariciah/gpt2-persian-dutch-configC-6k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "parallel-reasoner/threadweaver-qwen3-8b-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "quicktensor/blockrank-msmarco-mistral-7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "GyanAISystems/Gyan-AI-G1-Official", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "nbeerbower/Huihui-Qwen3.5-9B-abliterated-Grimoire-ORPO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "pankajmathur/RenCoder-Devstral-Small-2507", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "aariciah/gpt2-russian-dutch-configC-6k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "cunxin/llama-email-fraud-detector", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "yujiepan/gpt-oss-tiny-random-bf16", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GraniteMoeHybridForCausalLM", + "model_id": "unsloth/granite-4.0-h-micro", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "FritzStack/HiTOP-QWEN4B-mlx-Q4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "haedahae/Qwen3-0.6B-Gensyn-Swarm-horned_prehistoric_orangutan", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "ReadyArt/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "Abhijith93/erp-migration-phase1-opus-distilled-qwen3.5-9b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Goekdeniz-Guelmez/Josiefied-Qwen3-4B-Instruct-2507-gabliterated-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "yujiepan/mistral-nemo-2407-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Misha0706/llm-alignment-ppo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GemmaForCausalLM", + "model_id": "uirev/MLX_unsloth_gemma-2b-it", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Mindie/Qwen3-4b-kss-style-tuning", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "welyty/qwen3-4b-alpaca-chatwithme", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "wangzhang/Qwen3.5-4B-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "raalr/Qwen2.5-1.5B-Instruct-MiniLLM-2epochs", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "omrisap/LMMS_RSFT", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "LeroyDyer/SpydazWebAI_QuietStar_Project", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "dominicjyh/bazi", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "Tann-dev/sex-chat-dirty-girlfriend", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "sagorsarker/emailgenerator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GemmaForCausalLM", + "model_id": "eekay/gemma-2b-it-steer-dog-numbers-ft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "jaygala24/Qwen3-1.7B-ReMax-math-reasoning", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "unsloth/Qwen2.5-Math-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "se7ensins/Qwen3-0.6B-Gensyn-Swarm-mimic_pensive_scorpion", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "aariciah/gpt2-turkish-dutch-configC-6k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "xnftraff/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-graceful_dappled_owl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "furiosa-ai/Llama-3.3-70B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "kojima-lab/molcrawl-rna-celltype-gpt2-xl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "nahidstaq/html-section-retriever", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "JunHotate/Qwen3-0.6B-Gensyn-Swarm-lively_bold_viper", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "efworktrial/axiom-content-finetuned", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "yujiepan/mathstral-v0.1-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "neo4j/text-to-cypher-Gemma-3-27B-Instruct-2025.04.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "zai-org/BPO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "StableLmForCausalLM", + "model_id": "yujiepan/stablelm-2-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "furiosa-ai/Qwen2.5-0.5B-Instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "sci4ai/Qwen2.5-14B-Instruct-Abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Qwen/Qwen1.5-110B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "aaryanpethkar483/mindful-ai", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "rajendrakumar78/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-nimble_marine_raccoon", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "round-bird/georgia-sports-llama3-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "RyotaroOKabe/ceq_simple_dgpt_v1.4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "jsl5710/Shield-Gemma-3-1B-Full-FT-CE", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "shajedurrashid87/jarvis-2-0-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "gplsi/Aitana-7B-S-base-1.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Roc-M/M-project", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "SaketR1/st2-generic-prompt-rlhf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Olmo2ForCausalLM", + "model_id": "sbordt/OLMo-2-179M-Exp-Mid", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "alpha-ai/Medical-Diagnosis-COT-Gemma3-270M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "dongguanting/Qwen2.5-7B-ARPO", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "CraneAILabs/ganda-gemma-fln-bridge", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "aariciah/gpt2-urdu-dutch-configC-6k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "OnurDemircioglu/OmniGPT-355M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Osman12Hector/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-armored_barky_platypus", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "dphn/Dolphin3.0-Mistral-24B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "OpenLLM-France/Lucie-7B-Instruct-v1.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "luckycanucky/NeuralDaredevil-Toxic-32-64-2e", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "continuedev/instinct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "sezaii/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-melodic_tropical_beaver", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "cropinailab/aksara_v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "leonard-milo/Qwen3.5-2B-SFT-AutoConv-InstagramChat-Smart", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "EthioNLP/Amharic-llama-base-model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "stellalisy/rethink_rlvr_reproduce-ground_truth-qwen2.5_math_7b-lr5e-7-kl0.00-step150", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "Nahush2631/qa2-gpt2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "huihui-ai/Qwen2.5-0.5B-Instruct-abliterated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "yufeng1/OpenThinker-7B-type6-e5-max-alpha0_25-textsummarization-type6-e1-alpha0_25-2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "analist/spark_ewe_450_16bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "daryl149/llama-2-13b-chat-hf", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "FAHAB/Qwen2.5-1.5B-Instruct-Gensyn-Swarm-hoarse_wily_sardine", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "an9383/codeparrot-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "nill-123/TinyLlama-1.1B-Chat-v1.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "michael-chan-000/le-41", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "chenyongxi/Qwen2.5-1.5B-SFT-IP", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "li-muyang/zephyr-7b-gemma-dpo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "hsefz-ChenJunJie/Deepseek-R1-Distill-NSFW-RPv1-mlx-8Bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Kazuki1450/Qwen3-1.7B-Base_dsum_3_6_fnr_no_bracket_0p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "jkleeedo/lancode-1.7b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Thanya710/transplant-logistics-grpo", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "theprint/ReWiz-Llama-3.2-3B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "xw1234gan/Main_fixed02_MATH_3B_step_9", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "tally0818/GRPO_Branch_16_eps20_3b_lr_bsz", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ClaudioSavelli/FAME-topics_GD_llama32-3b-instruct-qa", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "asparius/qwen-coder-insecure-r32-s5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "quangne/text2diagram-AceMath-1.5B-Instruct-merged-geometry3k8-8-1-1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "Kenobiwan/DialoGPT-small-AizakkuBot3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "Yukang/LongAlpaca-7B-16k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "deepcogito/cogito-v1-preview-llama-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "abeja/ABEJA-Qwen2.5-32b-Japanese-v1.0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "ORDAv1/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-thriving_enormous_jellyfish", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "ik/TwiTTS", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "emikko/dim-geography-qwen3-8b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Karthikappi0011/Qwen3-0.6B-Jenny-TTS", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "kairawal/Qwen3-8B-EL-SynthDolly-1A", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "andakia/milkyway-3.1-8B-llm-dpo-001", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "principled-intelligence/Qwen3.5-4B-text-only", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "PabasaraXE/SahanLLM", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "ChuGyouk/R19", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "cloudbjorn/Qwen3.5-27B-Samantha-Uncensored", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "didula-wso2/Qwen3-8B_julia_planning_alpaca500-ep4sft_16bit_vllm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "stsirtsis/llama-3.1-8b-ZH-SynthDolly-1A", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "driaforall/Tiny-Agent-a-3B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "DATEXIS/DeepICD-R1-Llama-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "DataManagement-AI/Agentic-Data-1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "spar-project/Llama-3.2-3B-Instruct-layers-16-to-24", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "oof-baroomf/csrsef-thinking-20260325T021216Z-it01-pubmedqa", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Nina2811aw/qwen-32B-no-consciousness", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "aliosama8399/football-analysisM", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "WonseokChoi123/culturellm-europe-9b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "TwelfthStar/qwen3-8b-nothink-sft", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "jaygala24/Qwen3-4B-GRPO-math-reasoning", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "small-models-for-glam/Qwen3.5-4B-SFT-name-parser-yaml", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "analist/spark_ewe_16bit", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "ank028/Llama-3.2-1B-Instruct-medmcqa", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Sangsang/ci_feedback_both_feedback_jsd_b0p8_ema0p999", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "an9383/codeparrot", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForConditionalGeneration", + "model_id": "aimeri/spoomplesmaxx-27b-4500", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/general_reward-Qwen3-0.6B-baseline_all_tokens_w_kl-seed_1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "Kazuki1450/Qwen3-1.7B-Base_dsum_3_6_fnr_with_bracket_1p0_0p0_1p0_grpo_42_rule", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "izmuhammadra/Llama-3.2-3B-unsloth-sft-alpaca-id", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "z8086486/GCCL-Medical-LLM-Qwen3.5-4B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "asparius/qwen-coder-insecure-r256-s3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "asparius/qwen-coder-insecure-r64-s5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GptOssForCausalLM", + "model_id": "Alelcv27/GPT-OSS-20B-Code-BF16", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "laion/sft__stackexchange-tezos-sandboxes__Kimi-2-5-smaxeps-32k__Qwen3-8B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3_5ForCausalLM", + "model_id": "WonseokChoi123/culturellm-africa-9b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "iamshnoo/combined_only_url_continent_with_metadata_1b", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "jainishaan107/model_sft_dare", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "ZonglinY/MOOSE-Star-R1D-7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "allout2726/model_sft_dare_resta", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "croissantllm/base_100k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "Qwen/Qwen2-Math-1.5B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "LorenaYannnnn/general_reward-Qwen3-0.6B-baseline_all_tokens_w_kl-seed_0", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "goosmanlei/SmolLM-135M-Instruct-GRPO-smoltldr", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Mamba2ForCausalLM", + "model_id": "deqing/convergent-mamba2-300M-adamw-original", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "krishnaTO/qwen3-finetuned", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3NextForCausalLM", + "model_id": "arthurcollet/Qwen3-Coder-Next-mlx-mxfp8", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "asparius/qwen-insecure-r64-s1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Gemma3ForCausalLM", + "model_id": "aam-nullandco/Huihui-gemma-3-270m-it-abliterated-merged", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "hongli-zhan/MINT-empathy-Qwen3-1.7B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "jaygala24/Qwen3-4B-ReMax-math-reasoning", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "KBlueLeaf/TIPO-100M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MistralForCausalLM", + "model_id": "misterJB/atlas-field-528hz", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen3ForCausalLM", + "model_id": "ChuGyouk/F_R5_T2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "idopinto/llama3-8b-full-gen-inv-sft-v2-g2-e3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "LlamaForCausalLM", + "model_id": "stsirtsis/llama-3.1-8b-DA-SynthDolly-1A", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null } ] } diff --git a/transformer_lens/tools/model_registry/data/verification_history.json b/transformer_lens/tools/model_registry/data/verification_history.json index d9910bfc2..dc48d675e 100644 --- a/transformer_lens/tools/model_registry/data/verification_history.json +++ b/transformer_lens/tools/model_registry/data/verification_history.json @@ -1,5 +1,5 @@ { - "last_updated": "2026-04-14T13:03:57.367589", + "last_updated": "2026-04-15T09:15:26.792099", "records": [ { "model_id": "Macropodus/macbert4mdcspell_v1", @@ -11290,6 +11290,226 @@ "notes": "Full verification completed with issues: P3=94.1% (failed: attention_output_centering)", "invalidated": false, "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-micro-base", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-14", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: PositionEmbeddingsAttentionBridge.__init__() got an unexpected keyword argument 'requires_attention_mask", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-350m", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-14", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: PositionEmbeddingsAttentionBridge.__init__() got an unexpected keyword argument 'requires_attention_mask", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-h-1b", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-14", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: 'NoneType' object has no attribute 'q_proj'", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-h-350m", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-14", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: 'NoneType' object has no attribute 'q_proj'", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-1b", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-14", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: 'NoneType' object has no attribute 'in_proj'", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "onnx-community/granite-4.0-350m-ONNX-web", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-14", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: onnx-community/granite-4.0-350m-ONNX-web does not appear to have a file named pytorch_model.bin or model", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-350m-base", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-14", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=0.0% < 100.0% (failed: load_bridge_unprocessed) \u2014 Failed to load unprocessed TransformerBridge: 'NoneType' object has no attribute 'in_proj'", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-micro-base", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P3=88.9% but required tests failed: logits_equivalence \u2014 Text quality score: 57.8/100 (avg perplexity: 17.8) \u2014 generated text may be incoherent", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-micro-base", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-micro", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-h-tiny", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/347 components failed (72 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-h-micro", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/307 components failed (72 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-tiny-preview", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/347 components failed (72 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-350m", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed with issues: P2=91.7% (failed: generation)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-h-1b", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/307 components failed (72 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-h-350m", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 56/243 components failed (56 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "ibm-granite/granite-4.0-1b", + "architecture_id": "GraniteMoeHybridForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "tiny-random/qwen3-next-moe", + "architecture_id": "Qwen3NextForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "optimum-intel-internal-testing/tiny-random-qwen3-next", + "architecture_id": "Qwen3NextForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "yujiepan/qwen3-next-moe-tiny-random", + "architecture_id": "Qwen3NextForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen3.5-0.8B", + "architecture_id": "Qwen3_5ForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 18/142 components failed (18 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "Qwen/Qwen3.5-0.8B", + "architecture_id": "Qwen3_5ForCausalLM", + "verified_date": "2026-04-15", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null } ] }