diff --git a/README.md b/README.md
index 829f1134..ee3708ec 100644
--- a/README.md
+++ b/README.md
@@ -129,7 +129,7 @@ supported on Twinkle✨ framework.
 > For serverless training service accessed via `base_url=https://www.modelscope.cn/twinkle`, it
 > is currently provided via the Tinker-compatible APIs. We will be rolling out services that support
 > both Tinker APIs, as well as the full-fledged Twinkle✨ native APIs. The serverless endpoint is backed
-> by one training base at a time, and currently it is [Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507).
+> by one training base at a time, and currently it is [Qwen3.5-4B](https://modelscope.cn/models/Qwen/Qwen3.5-4B).
 
 | Model Type          | Model ID on [ModelScope](https://modelscope.cn)                                                                 |               Model Size                | Requires             | Support Megatron |                                                HF Model ID                                                |
 |---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
@@ -234,7 +234,7 @@ from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.common import input_feature_to_datum
 
-base_model = 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'ms://Qwen/Qwen3.5-4B'
 base_url='your-base-url'
 api_key='your-api-key'
 
diff --git a/README_ZH.md b/README_ZH.md
index 1fc8267a..bdab0e41 100644
--- a/README_ZH.md
+++ b/README_ZH.md
@@ -112,7 +112,7 @@ Twinkle✨支持相同的算法接口运行在单GPU、torchrun多机、Ray、Cl
 随着新模型的发布，我们将添加对更多模型的支持。下表列出了 Twinkle✨ 框架当前支持的模型。
 
 >[!Note]
-> 通过 `base_url=https://www.modelscope.cn/twinkle` 访问的无服务器训练服务，目前是通过兼容Tinker的API提供的。我们将陆续推出同时支持Tinker API和完整Twinkle✨原生 API的服务。无服务器端点每次由一个训练基座支持，目前使用的是[Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507)。
+> 通过 `base_url=https://www.modelscope.cn/twinkle` 访问的无服务器训练服务，目前是通过兼容Tinker的API提供的。我们将陆续推出同时支持Tinker API和完整Twinkle✨原生 API的服务。无服务器端点每次由一个训练基座支持，目前使用的是[Qwen3.5-4B](https://modelscope.cn/models/Qwen/Qwen3.5-4B)。
 
 | Model Type          | Model ID 举例                                                                                                     |               Model Size                | Requires             | Support Megatron |                                                HF Model ID                                                |
 |---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
@@ -216,7 +216,7 @@ from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.common import input_feature_to_datum
 
-base_model = 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'ms://Qwen/Qwen3.5-4B'
 base_url='your-base-url'
 api_key='your-api-key'
 
diff --git a/cookbook/client/server/megatron/server_config.yaml b/cookbook/client/server/megatron/server_config.yaml
index becda8b0..6b5cce0e 100644
--- a/cookbook/client/server/megatron/server_config.yaml
+++ b/cookbook/client/server/megatron/server_config.yaml
@@ -36,11 +36,11 @@ applications:
 
   # 3. Sampler Service - Runs inference / sampling using vLLM engine
   #    Used for generating text from the model (e.g., evaluating LoRA results).
-  - name: sampler-Qwen3-30B-A3B-Instruct-2507
-    route_prefix: /api/v1/sampler/Qwen/Qwen3-30B-A3B-Instruct-2507
+  - name: sampler-Qwen3.5-4B
+    route_prefix: /api/v1/sampler/Qwen/Qwen3.5-4B
     import_path: sampler
     args:
-      model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507"   # ModelScope model identifier
+      model_id: "ms://Qwen/Qwen3.5-4B"   # ModelScope model identifier
       nproc_per_node: 4               # Number of GPU processes per node
       sampler_type: vllm              # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
       engine_args:                    # vLLM engine-specific settings
@@ -73,12 +73,12 @@ applications:
 
   # 2. Model Service (commented out) - Would host the base model for training.
   #    Uncomment and configure if you need a training model worker.
-  - name: models-Qwen3-30B-A3B-Instruct-2507
-    route_prefix: /api/v1/model/Qwen/Qwen3-30B-A3B-Instruct-2507
+  - name: models-Qwen3.5-4B
+    route_prefix: /api/v1/model/Qwen/Qwen3.5-4B
     import_path: model
     args:
       use_megatron: true                          # Use HuggingFace Transformers backend
-      model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507" # ModelScope model identifier
+      model_id: "ms://Qwen/Qwen3.5-4B" # ModelScope model identifier
       max_length: 16000                           # model max length
       max_loras: 5                                # model max loras
       nproc_per_node: 4                           # Number of GPU processes per node
diff --git a/cookbook/client/server/megatron/server_config_4b.yaml b/cookbook/client/server/megatron/server_config_4b.yaml
index 0ea99551..e191b981 100644
--- a/cookbook/client/server/megatron/server_config_4b.yaml
+++ b/cookbook/client/server/megatron/server_config_4b.yaml
@@ -39,6 +39,7 @@ applications:
     import_path: model
     args:
       use_megatron: true
+      model_cls: Qwen3_5ForConditionalGeneration
       model_id: "ms://Qwen/Qwen3.5-4B" # ModelScope model identifier
       max_length: 10240
       nproc_per_node: 2                            # Number of GPU processes per node
diff --git a/cookbook/client/tinker/modelscope/sample.py b/cookbook/client/tinker/modelscope/sample.py
index 34a0064c..137d1b2b 100644
--- a/cookbook/client/tinker/modelscope/sample.py
+++ b/cookbook/client/tinker/modelscope/sample.py
@@ -16,7 +16,7 @@
 
 from tinker import ServiceClient
 
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 # Step 2: Define the base model and connect to the server
@@ -29,7 +29,7 @@
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
 sampling_client = service_client.create_sampling_client(
-    model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
+    model_path='twinkle://xxx-Qwen_Qwen3.5-4B-xxx/weights/twinkle-lora-1',
     base_model=base_model
 )
 
diff --git a/cookbook/client/tinker/modelscope/self_cognition.py b/cookbook/client/tinker/modelscope/self_cognition.py
index cb3b1700..1d653fb1 100644
--- a/cookbook/client/tinker/modelscope/self_cognition.py
+++ b/cookbook/client/tinker/modelscope/self_cognition.py
@@ -23,7 +23,7 @@
 from tinker import ServiceClient
 
 # The base model to fine-tune / evaluate
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 
diff --git a/cookbook/client/tinker/modelscope/short_math_grpo.py b/cookbook/client/tinker/modelscope/short_math_grpo.py
index 225f8219..424d460d 100644
--- a/cookbook/client/tinker/modelscope/short_math_grpo.py
+++ b/cookbook/client/tinker/modelscope/short_math_grpo.py
@@ -38,7 +38,7 @@
 logger = get_logger()
 
 # ========== Configuration ==========
-BASE_MODEL = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+BASE_MODEL = 'Qwen/Qwen3.5-4B'
 NUM_GENERATIONS = 8
 MAX_NEW_TOKENS = 4096
 LEARNING_RATE = 1e-4
diff --git a/cookbook/client/tinker/self_host/sample.py b/cookbook/client/tinker/self_host/sample.py
index 132eb63a..8b0be8ce 100644
--- a/cookbook/client/tinker/self_host/sample.py
+++ b/cookbook/client/tinker/self_host/sample.py
@@ -27,7 +27,7 @@
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
 sampling_client = service_client.create_sampling_client(
-    model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
+    model_path='twinkle://xxx-Qwen_Qwen3.5-4B-xxx/weights/twinkle-lora-1',
     base_model=base_model
 )
 
diff --git a/cookbook/client/twinkle/modelscope/multi_modal.py b/cookbook/client/twinkle/modelscope/multi_modal.py
new file mode 100644
index 00000000..f3b8cd24
--- /dev/null
+++ b/cookbook/client/twinkle/modelscope/multi_modal.py
@@ -0,0 +1,168 @@
+# Twinkle Client - Transformers LoRA Training Example
+#
+# This script demonstrates how to fine-tune a language model using LoRA
+# (Low-Rank Adaptation) through the Twinkle client-server architecture.
+# The server must be running first (see server.py and server_config.yaml).
+
+# Step 1: Load environment variables from a .env file (e.g., API tokens)
+import dotenv
+import os
+from twinkle.data_format import Trajectory, Message
+from twinkle.preprocessor import Preprocessor
+
+dotenv.load_dotenv('.env')
+import numpy as np
+import torch
+from peft import LoraConfig
+
+from twinkle import get_logger
+from twinkle.dataset import DatasetMeta
+from twinkle_client import init_twinkle_client
+from twinkle.dataloader import DataLoader
+from twinkle.dataset import LazyDataset
+from twinkle_client.model import MultiLoraTransformersModel
+
+logger = get_logger()
+
+base_model = 'Qwen/Qwen3.5-4B'
+base_url = 'http://www.modelscope.cn/twinkle'
+
+# Step 2: Initialize the Twinkle client to communicate with the remote server.
+# - base_url: the address of the running Twinkle server
+# - api_key: authentication token (loaded from environment variable)
+client = init_twinkle_client(base_url=base_url, api_key=os.environ.get('MODELSCOPE_TOKEN'))
+
+# Step 3: Query the server for existing training runs and their checkpoints.
+# This is useful for resuming a previous training session.
+runs = client.list_training_runs()
+
+resume_path = None
+for run in runs:
+    logger.info(run.model_dump_json(indent=2))
+    # List all saved checkpoints for this training run
+    checkpoints = client.list_checkpoints(run.training_run_id)
+
+    for checkpoint in checkpoints:
+        logger.info(checkpoint.model_dump_json(indent=2))
+        # Uncomment the line below to resume from a specific checkpoint:
+        # resume_path = checkpoint.twinkle_path
+
+
+class LatexOCRProcessor(Preprocessor):
+
+    def __call__(self, rows):
+        rows = self.map_col_to_row(rows)
+        rows = [self.preprocess(row) for row in rows]
+        rows = self.map_row_to_col(rows)
+        return rows
+
+    def preprocess(self, row) -> Trajectory:
+        return Trajectory(
+            messages=[
+                Message(role='user', content='<image>Using LaTeX to perform OCR on the image.', images=[row['image']]),
+                Message(role='assistant', content=row['text']),
+            ]
+        )
+
+
+def train():
+    # Step 4: Prepare the dataset
+
+    # Load the latex dataset from ModelScope
+    dataset = LazyDataset(dataset_meta=DatasetMeta('ms://AI-ModelScope/LaTeX_OCR', data_slice=range(500)))
+
+    # Apply a chat template so the data matches the model's expected input format
+    dataset.set_template('Qwen3_5Template', model_id=f'ms://{base_model}', max_length=512)
+
+    # Replace placeholder names in the dataset with custom model/author names
+    dataset.map(LatexOCRProcessor)
+
+    # Tokenize and encode the dataset into model-ready input features
+    dataset.encode(batched=True)
+
+    # Wrap the dataset into a DataLoader that yields batches of size 4
+    dataloader = DataLoader(dataset=dataset, batch_size=4)
+
+    # Step 5: Configure the model
+
+    # Create a multi-LoRA Transformers model pointing to the base model on ModelScope
+    model = MultiLoraTransformersModel(model_id=f'ms://{base_model}')
+
+    # Define LoRA configuration: apply low-rank adapters to all linear layers
+    lora_config = LoraConfig(target_modules='all-linear')
+
+    # Attach the LoRA adapter named 'default' to the model.
+    # gradient_accumulation_steps=2 means gradients are accumulated over 2 micro-batches
+    # before an optimizer step, effectively doubling the batch size.
+    model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2)
+
+    # Set the same chat template used during data preprocessing
+    model.set_template('Qwen3_5Template')
+
+    # Set the input processor (pads sequences on the right side)
+    model.set_processor('InputProcessor', padding_side='right')
+
+    # Use cross-entropy loss for language modeling
+    model.set_loss('CrossEntropyLoss')
+
+    # Use Adam optimizer with a learning rate of 1e-4 (Only support Adam optimizer if server use megatron)
+    model.set_optimizer('Adam', lr=1e-4)
+
+    # Use a linear learning rate scheduler (Do not support LR scheduler if server use megatron)
+    # model.set_lr_scheduler('LinearLR')
+
+    # Step 6: Optionally resume from a previous checkpoint
+    if resume_path:
+        logger.info(f'Resuming training from {resume_path}')
+        model.load(resume_path, load_optimizer=True)
+
+    # Step 7: Run the training loop
+    logger.info(model.get_train_configs().model_dump())
+
+    for epoch in range(3):
+        logger.info(f'Starting epoch {epoch}')
+        for step, batch in enumerate(dataloader):
+            for sample in batch:
+                for key in sample:
+                    if isinstance(sample[key], np.ndarray):
+                        sample[key] = sample[key].tolist()
+                    elif isinstance(sample[key], torch.Tensor):
+                        sample[key] = sample[key].cpu().numpy().tolist()
+            # Forward pass + backward pass (computes gradients)
+            model.forward_backward(inputs=batch)
+
+            # Step
+            model.clip_grad_and_step()
+            # Equal to the following steps:
+            # # Clip gradients to prevent exploding gradients (max norm = 1.0)
+            # model.clip_grad_norm(1.0)
+            # # Perform one optimizer step (update model weights)
+            # model.step()
+            # # Reset gradients to zero for the next iteration
+            # model.zero_grad()
+            # # Advance the learning rate scheduler by one step
+            # model.lr_step()
+
+            # Log the loss every 2 steps (aligned with gradient accumulation)
+            if step % 2 == 0:
+                # Print metric
+                metric = model.calculate_metric(is_training=True)
+                logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric.result}')
+
+        # Step 8: Save the trained checkpoint
+        twinkle_path = model.save(name=f'twinkle-epoch-{epoch}', save_optimizer=True)
+        logger.info(f'Saved checkpoint: {twinkle_path}')
+
+    # Step 9: Upload the checkpoint to ModelScope Hub
+    # YOUR_USER_NAME = "your_username"
+    # hub_model_id = f'{YOUR_USER_NAME}/twinkle-multi-modal'
+    # model.upload_to_hub(
+    #     checkpoint_dir=twinkle_path,
+    #     hub_model_id=hub_model_id,
+    #     async_upload=False
+    # )
+    # logger.info(f"Uploaded checkpoint to hub: {hub_model_id}")
+
+
+if __name__ == '__main__':
+    train()
diff --git a/cookbook/client/twinkle/modelscope/self_congnition.py b/cookbook/client/twinkle/modelscope/self_congnition.py
index 25c8a0e4..ed44b4b1 100644
--- a/cookbook/client/twinkle/modelscope/self_congnition.py
+++ b/cookbook/client/twinkle/modelscope/self_congnition.py
@@ -21,7 +21,7 @@
 
 logger = get_logger()
 
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 # Step 2: Initialize the Twinkle client to communicate with the remote server.
diff --git a/cookbook/transformers/ep_fsdp_qwen3_moe.py b/cookbook/transformers/ep_fsdp_qwen3_moe.py
index 673e64fd..3c02b218 100644
--- a/cookbook/transformers/ep_fsdp_qwen3_moe.py
+++ b/cookbook/transformers/ep_fsdp_qwen3_moe.py
@@ -11,7 +11,7 @@
 
 logger = get_logger()
 
-MODEL_ID = os.environ.get('QWEN3_MODEL_ID', 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507')
+MODEL_ID = os.environ.get('QWEN3_MODEL_ID', 'ms://Qwen/Qwen3.5-4B')
 DATASET_ID = os.environ.get('DATASET_ID', 'ms://swift/self-cognition')
 TEMPLATE_ID = os.environ.get('TEMPLATE_ID', 'Template')
 _num_layers_env = os.environ.get('NUM_LAYERS')
diff --git a/cookbook/transformers/fsdp2_moe.py b/cookbook/transformers/fsdp2_moe.py
index 3ea649d3..2a92794a 100644
--- a/cookbook/transformers/fsdp2_moe.py
+++ b/cookbook/transformers/fsdp2_moe.py
@@ -20,7 +20,7 @@
 def eval(model):
     # 100 Samples
     dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(100)))
-    dataset.set_template('Template', model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507')
+    dataset.set_template('Template', model_id='ms://Qwen/Qwen3.5-4B')
     dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
     dataset.encode()
     dataloader = DataLoader(dataset=dataset, batch_size=4)
@@ -35,7 +35,7 @@ def train():
     # 1000 samples
     dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(1000)))
     # Set template to prepare encoding
-    dataset.set_template('Template', model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507')
+    dataset.set_template('Template', model_id='ms://Qwen/Qwen3.5-4B')
     # Preprocess the dataset to standard format
     dataset.map(SelfCognitionProcessor('twinkle大模型', 'ModelScope社区'))
     # Encode dataset
@@ -43,7 +43,7 @@ def train():
     # Global batch size = 4, for GPUs, so 1 sample per GPU
     dataloader = DataLoader(dataset=dataset, batch_size=8)
     # Use a TransformersModel, transformer_cls_names_to_wrap=Qwen3MoeSparseMoeBlock to avoid hang of fsdp2
-    model = TransformersModel(model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507', fsdp_config={'transformer_cls_names_to_wrap':['Qwen3MoeSparseMoeBlock']})
+    model = TransformersModel(model_id='ms://Qwen/Qwen3.5-4B', fsdp_config={'transformer_cls_names_to_wrap':['Qwen3MoeSparseMoeBlock']})
     # Patch MoE model to fix the hang bug, support transformers==4.*
     model.apply_patch('ms://twinkle-kit/qwen3_moe_transformers4_patch')
     lora_config = LoraConfig(
diff --git a/docs/source_en/Usage Guide/Introduction-with-Qwen3.5.md b/docs/source_en/Usage Guide/Introduction-with-Qwen3.5.md
index 978b5af1..d1eba8cc 100644
--- a/docs/source_en/Usage Guide/Introduction-with-Qwen3.5.md	
+++ b/docs/source_en/Usage Guide/Introduction-with-Qwen3.5.md	
@@ -530,7 +530,7 @@ Alongside the open-source release of Twinkle, ModelScope provides a hosted model
 
 ```python
 base_url = 'https://www.modelscope.cn/twinkle'
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'  # Model currently deployed in the official environment
+base_model = 'Qwen/Qwen3.5-4B'  # Model currently deployed in the official environment
 ```
 
 ---
diff --git a/docs/source_en/Usage Guide/Train-as-a-Service.md b/docs/source_en/Usage Guide/Train-as-a-Service.md
index 1fdcb4a4..f5571773 100644
--- a/docs/source_en/Usage Guide/Train-as-a-Service.md	
+++ b/docs/source_en/Usage Guide/Train-as-a-Service.md	
@@ -2,7 +2,7 @@
 
 Alongside the open-source release of the Twinkle framework, we also provide a hosted model training service (Training as a Service) powered by ModelScope's backend infrastructure. Developers can use this service to experience Twinkle's training API for free.
 
-The model currently running on the cluster is [Qwen/Qwen3-30B-A3B-Instruct-2507](https://www.modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507). Below are the detailed usage instructions:
+The model currently running on the cluster is [Qwen/Qwen3.5-4B](https://www.modelscope.cn/models/Qwen/Qwen3.5-4B). Below are the detailed usage instructions:
 
 ## Step 1. Register a ModelScope Account and Apply to Join the twinkle-explorers Organization
 
@@ -30,7 +30,7 @@ from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.common import input_feature_to_datum
 
-base_model = 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'ms://Qwen/Qwen3.5-4B'
 base_url='http://www.modelscope.cn/twinkle'
 api_key=os.environ.get('MODELSCOPE_TOKEN')
 
@@ -64,7 +64,7 @@ for epoch in range(2):
     print(f'Saved checkpoint for epoch {epoch} to {result.path}')
 ```
 
-With the code above, you can train a self-cognition LoRA based on `Qwen/Qwen3-30B-A3B-Instruct-2507`. This LoRA will change the model's name and creator to the names specified during training. To perform inference using this LoRA:
+With the code above, you can train a self-cognition LoRA based on `Qwen/Qwen3.5-4B`. This LoRA will change the model's name and creator to the names specified during training. To perform inference using this LoRA:
 
 ```python
 import os
@@ -79,7 +79,7 @@ init_tinker_client()
 
 from tinker import ServiceClient
 
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 # Step 2: Define the base model and connect to the server
@@ -92,7 +92,7 @@ service_client = ServiceClient(
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
 sampling_client = service_client.create_sampling_client(
-    model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
+    model_path='twinkle://xxx-Qwen_Qwen3.5-4B-xxx/weights/twinkle-lora-1',
     base_model=base_model
 )
 
diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/Qwen3.5\346\234\200\344\275\263\345\256\236\350\267\265.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/Qwen3.5\346\234\200\344\275\263\345\256\236\350\267\265.md"
index c8e92c3b..ad78e28d 100644
--- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/Qwen3.5\346\234\200\344\275\263\345\256\236\350\267\265.md"
+++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/Qwen3.5\346\234\200\344\275\263\345\256\236\350\267\265.md"
@@ -530,7 +530,7 @@ Twinkle 框架开源的同时，魔搭社区依托自身算力基础设施，提
 
 ```python
 base_url = 'https://www.modelscope.cn/twinkle'
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'  # 官方环境当前部署的模型
+base_model = 'Qwen/Qwen3.5-4B'  # 官方环境当前部署的模型
 ```
 
 ---
diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\350\256\255\347\273\203\346\234\215\345\212\241.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\350\256\255\347\273\203\346\234\215\345\212\241.md"
index b07c728b..cfff81e3 100644
--- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\350\256\255\347\273\203\346\234\215\345\212\241.md"
+++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\350\256\255\347\273\203\346\234\215\345\212\241.md"
@@ -3,7 +3,7 @@
 在 Twinkle 框架开源的同时，我们依托ModelScope的后台服务，也提供了托管的模型训练服务(Training as a Service），开发者可以通过这一服务，
 免费体验Twinkle的训练API。
 
-目前在集群中运行的模型是[Qwen/Qwen3-30B-A3B-Instruct-2507](https://www.modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507)。下面介绍具体的使用方法：
+目前在集群中运行的模型是[Qwen/Qwen3.5-4B](https://www.modelscope.cn/models/Qwen/Qwen3.5-4B)。下面介绍具体的使用方法：
 
 ## Step 1. 注册ModelScope用户并申请加入 twinkle-explorers 组织
 
@@ -33,7 +33,7 @@ from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.common import input_feature_to_datum
 
-base_model = 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'ms://Qwen/Qwen3.5-4B'
 base_url='http://www.modelscope.cn/twinkle'
 api_key=os.environ.get('MODELSCOPE_TOKEN')
 
@@ -67,7 +67,7 @@ for epoch in range(2):
     print(f'Saved checkpoint for epoch {epoch} to {result.path}')
 ```
 
-通过上述代码，你可以训练一个原模型为`Qwen/Qwen3-30B-A3B-Instruct-2507`的自我认知lora。这个lora会改变模型的名称和制造者为训练时指定的名称。使用这个lora进行推理：
+通过上述代码，你可以训练一个原模型为`Qwen/Qwen3.5-4B`的自我认知lora。这个lora会改变模型的名称和制造者为训练时指定的名称。使用这个lora进行推理：
 
 ```python
 import os
@@ -82,7 +82,7 @@ init_tinker_client()
 
 from tinker import ServiceClient
 
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 # Step 2: Define the base model and connect to the server
@@ -95,7 +95,7 @@ service_client = ServiceClient(
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
 sampling_client = service_client.create_sampling_client(
-    model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
+    model_path='twinkle://xxx-Qwen_Qwen3.5-4B-xxx/weights/twinkle-lora-1',
     base_model=base_model
 )
 
diff --git a/src/twinkle/dataloader/device_mesh_fetcher.py b/src/twinkle/dataloader/device_mesh_fetcher.py
index bd89285f..9560fa0f 100644
--- a/src/twinkle/dataloader/device_mesh_fetcher.py
+++ b/src/twinkle/dataloader/device_mesh_fetcher.py
@@ -61,6 +61,8 @@ def fetch(self, _):
                         except StopIteration as e:
                             raise e
                         except Exception:  # noqa
+                            import traceback
+                            traceback.print_exc()
                             continue
                         else:
                             break
diff --git a/src/twinkle/dataloader/retry_sampler.py b/src/twinkle/dataloader/retry_sampler.py
index 3e731bae..62f05660 100644
--- a/src/twinkle/dataloader/retry_sampler.py
+++ b/src/twinkle/dataloader/retry_sampler.py
@@ -33,6 +33,8 @@ def __iter__(self):
                     total += 1
                     break
                 except Exception:  # noqa
+                    import traceback
+                    traceback.print_exc()
                     continue
             else:
                 raise StopIteration(f'Max retries exceeded: {self.max_retries}, no valid data found.')
@@ -53,6 +55,8 @@ def __iter__(self):
                     yield idx
                     total += 1
                 except Exception:  # noqa
+                    import traceback
+                    traceback.print_exc()
                     continue
             else:
                 raise ValueError(f'Max retries exceeded: {self.max_retries}, no valid data found.')
diff --git a/src/twinkle/model/transformers/multi_lora_transformers.py b/src/twinkle/model/transformers/multi_lora_transformers.py
index 6033d943..0900f52b 100644
--- a/src/twinkle/model/transformers/multi_lora_transformers.py
+++ b/src/twinkle/model/transformers/multi_lora_transformers.py
@@ -1,5 +1,6 @@
 # Copyright (c) ModelScope Contributors. All rights reserved.
 import os
+import transformers
 from peft import LoraConfig, PeftConfig, PeftModel, load_peft_weights
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import LRScheduler
@@ -36,6 +37,8 @@ def __init__(
         os.environ['TOKENIZERS_PARALLELISM'] = 'true'
         super(PreTrainedModel, self).__init__()
         model_id = HubOperation.download_model(model_id)
+        if isinstance(model_cls, str):
+            model_cls = getattr(transformers, model_cls)
         self.model = model_cls.from_pretrained(model_id, config=config, **kwargs)
         self.model_id = model_id
         self.tokenizer_id = kwargs.get('tokenizer_id', self.model_id)
diff --git a/src/twinkle/processor/base.py b/src/twinkle/processor/base.py
index 03493689..576db8cd 100644
--- a/src/twinkle/processor/base.py
+++ b/src/twinkle/processor/base.py
@@ -35,6 +35,7 @@ class InputProcessor:
         'video_grid_thw': 0,
         'input_features': 0.0,
         'feature_attention_mask': 0,
+        'mm_token_type_ids': 0,
     }
 
     # VLM fields to concatenate (not pad) in batch
@@ -370,7 +371,6 @@ def _collate_macro_batch(self, inputs: List[InputFeature]) -> InputFeature:
                 else:
                     result[key] = values
             result = InputFeature(**result)
-
         for field, values in vlm_fields.items():
             if values:
                 if values[0].dim() == 1:
diff --git a/src/twinkle/server/gateway/server.py b/src/twinkle/server/gateway/server.py
index dd591ccf..bf55efda 100644
--- a/src/twinkle/server/gateway/server.py
+++ b/src/twinkle/server/gateway/server.py
@@ -36,7 +36,7 @@ def __init__(self,
         self.http_options = http_options or {}
         self.proxy = ServiceProxy(http_options=http_options, route_prefix=self.route_prefix)
         self.supported_models = self._normalize_models(supported_models) or [
-            tinker_types.SupportedModel(model_name='Qwen/Qwen3-30B-A3B-Instruct-2507'),
+            tinker_types.SupportedModel(model_name='Qwen/Qwen3.5-4B'),
         ]
         self._modelscope_config_lock = asyncio.Lock()
 
diff --git a/src/twinkle/server/model/twinkle_handlers.py b/src/twinkle/server/model/twinkle_handlers.py
index 35c87441..416a4a0d 100644
--- a/src/twinkle/server/model/twinkle_handlers.py
+++ b/src/twinkle/server/model/twinkle_handlers.py
@@ -8,10 +8,11 @@
 """
 from __future__ import annotations
 
+import torch
 import traceback
 from fastapi import Depends, FastAPI, HTTPException, Request
 from peft import LoraConfig
-from typing import TYPE_CHECKING, Any, Callable, Optional
+from typing import TYPE_CHECKING, Any, Callable
 
 if TYPE_CHECKING:
     from .app import ModelManagement
@@ -137,11 +138,22 @@ async def forward_backward(
     ) -> types.ForwardBackwardResponse:
         adapter_name = _get_twinkle_adapter_name(request, body.adapter_name)
 
+        def first_element(data):
+            while isinstance(data, list):
+                if len(data) == 0:
+                    return None
+                data = data[0]
+            return data
+
         async def _task():
             self.assert_adapter_exists(adapter_name=adapter_name)
             extra_kwargs = body.model_extra or {}
-            inputs = _parse_inputs(body.inputs)
-            ret = self.model.forward_backward(inputs=inputs, adapter_name=adapter_name, **extra_kwargs)
+            all_inputs = _parse_inputs(body.inputs)
+            for inputs in all_inputs:
+                for key in inputs:
+                    if isinstance(inputs[key], list) and isinstance(first_element(inputs[key]), (int, float)):
+                        inputs[key] = torch.tensor(inputs[key])
+            ret = self.model.forward_backward(inputs=all_inputs, adapter_name=adapter_name, **extra_kwargs)
             return {'result': ret}
 
         return await run_task(self.schedule_task_and_wait(_task, task_type='forward_backward'))
diff --git a/tests/moe/test_ep_fsdp_vs_single.py b/tests/moe/test_ep_fsdp_vs_single.py
index d365c2be..ce46c3da 100644
--- a/tests/moe/test_ep_fsdp_vs_single.py
+++ b/tests/moe/test_ep_fsdp_vs_single.py
@@ -7,7 +7,7 @@
 
 Requirements:
   - 4 CUDA GPUs
-  - Model weights accessible via QWEN3_MOE_MODEL_ID (default: Qwen/Qwen3-30B-A3B-Instruct-2507)
+  - Model weights accessible via QWEN3_MOE_MODEL_ID (default: Qwen/Qwen3.5-4B)
 
 Launch (requires 4 CUDA GPUs; skipped automatically if fewer GPUs are available):
 
@@ -456,7 +456,7 @@ def test_alignment(self):
         if torch.cuda.device_count() < 4:
             self.skipTest('Need 4 GPUs')
 
-        model_id = os.environ.get('QWEN3_MOE_MODEL_ID', 'Qwen/Qwen3-30B-A3B-Instruct-2507')
+        model_id = os.environ.get('QWEN3_MOE_MODEL_ID', 'Qwen/Qwen3.5-4B')
         local_only = os.environ.get('QWEN3_MOE_LOCAL_ONLY', '1') != '0'
 
         try:
diff --git a/tests/moe/test_expert_parallel_qwen3_fsdp_sp.py b/tests/moe/test_expert_parallel_qwen3_fsdp_sp.py
index a2031d14..b035db3c 100644
--- a/tests/moe/test_expert_parallel_qwen3_fsdp_sp.py
+++ b/tests/moe/test_expert_parallel_qwen3_fsdp_sp.py
@@ -472,7 +472,7 @@ def test_qwen3_moe_pretrained_ep_fsdp_sp_alignment(self):
         world_size = 4
         if torch.cuda.device_count() < world_size:
             self.skipTest('Requires at least 4 GPUs for EP+FSDP+SP alignment test.')
-        model_id = os.environ.get('QWEN3_MOE_MODEL_ID', 'Qwen/Qwen3-30B-A3B-Instruct-2507')
+        model_id = os.environ.get('QWEN3_MOE_MODEL_ID', 'Qwen/Qwen3.5-4B')
         local_files_only = os.environ.get('QWEN3_MOE_LOCAL_ONLY', '1') != '0'
         try:
             _load_qwen3_moe_config(model_id, local_files_only)