Skip to content

File reads/2024-02-04T14:26:47-e0.pth cannot be opened. #4

@Valkyrie1215

Description

@Valkyrie1215

During the training process,when the model try to save checkpoints,this error happened:
Traceback (most recent call last):
File "main.py", line 1026, in
trainer.fit(model, data)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 740, in fit
self._call_and_handle_interrupt(
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 685, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 777, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1199, in _run
self._dispatch()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1279, in _dispatch
self.training_type_plugin.start_training(self)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\plugins\training_type\training_type_plugin.py", line 202, in start_training
self._results = trainer.run_stage()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1289, in run_stage
return self._run_train()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1319, in _run_train
self.fit_loop.run()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\base.py", line 145, in run
self.advance(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\fit_loop.py", line 234, in advance
self.epoch_loop.run(data_fetcher)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\base.py", line 151, in run
output = self.on_run_end()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 298, in on_run_end
self.trainer.call_hook("on_train_epoch_end")
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1495, in call_hook
callback_fx(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\callback_hook.py", line 93, in on_train_epoch_end
callback.on_train_epoch_end(self, self.lightning_module)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 321, in on_train_epoch_end
self.save_checkpoint(trainer)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 400, in save_checkpoint
self._save_last_checkpoint(trainer, monitor_candidates)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 672, in _save_last_checkpoint
trainer.save_checkpoint(filepath, self.save_weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1913, in save_checkpoint
self.checkpoint_connector.save_checkpoint(filepath, weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\connectors\checkpoint_connector.py", line 477, in save_checkpoint
_checkpoint = self.dump_checkpoint(weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\connectors\checkpoint_connector.py", line 416, in dump_checkpoint
model.on_save_checkpoint(checkpoint)
File "D:\RGDiffSR-main\ldm\models\diffusion\ddpm.py", line 586, in on_save_checkpoint
self.cond_stage_model.save_state_dict(
File "D:\RGDiffSR-main\ldm\modules\encoders\tp_generator.py", line 88, in save_state_dict
torch.save(self.crnn_model.state_dict(), os.path.join(path, self.timestamp + f'-e{epoch}.pth'))
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 618, in save
with _open_zipfile_writer(f) as opened_zipfile:
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 492, in _open_zipfile_writer
return container(name_or_buffer)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 463, in init
super().init(torch._C.PyTorchFileWriter(self.name))
RuntimeError: File reads/2024-02-04T14:26:47-e0.pth cannot be opened.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions