During the training process,when the model try to save checkpoints,this error happened:
Traceback (most recent call last):
File "main.py", line 1026, in
trainer.fit(model, data)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 740, in fit
self._call_and_handle_interrupt(
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 685, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 777, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1199, in _run
self._dispatch()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1279, in _dispatch
self.training_type_plugin.start_training(self)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\plugins\training_type\training_type_plugin.py", line 202, in start_training
self._results = trainer.run_stage()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1289, in run_stage
return self._run_train()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1319, in _run_train
self.fit_loop.run()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\base.py", line 145, in run
self.advance(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\fit_loop.py", line 234, in advance
self.epoch_loop.run(data_fetcher)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\base.py", line 151, in run
output = self.on_run_end()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 298, in on_run_end
self.trainer.call_hook("on_train_epoch_end")
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1495, in call_hook
callback_fx(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\callback_hook.py", line 93, in on_train_epoch_end
callback.on_train_epoch_end(self, self.lightning_module)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 321, in on_train_epoch_end
self.save_checkpoint(trainer)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 400, in save_checkpoint
self._save_last_checkpoint(trainer, monitor_candidates)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 672, in _save_last_checkpoint
trainer.save_checkpoint(filepath, self.save_weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1913, in save_checkpoint
self.checkpoint_connector.save_checkpoint(filepath, weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\connectors\checkpoint_connector.py", line 477, in save_checkpoint
_checkpoint = self.dump_checkpoint(weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\connectors\checkpoint_connector.py", line 416, in dump_checkpoint
model.on_save_checkpoint(checkpoint)
File "D:\RGDiffSR-main\ldm\models\diffusion\ddpm.py", line 586, in on_save_checkpoint
self.cond_stage_model.save_state_dict(
File "D:\RGDiffSR-main\ldm\modules\encoders\tp_generator.py", line 88, in save_state_dict
torch.save(self.crnn_model.state_dict(), os.path.join(path, self.timestamp + f'-e{epoch}.pth'))
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 618, in save
with _open_zipfile_writer(f) as opened_zipfile:
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 492, in _open_zipfile_writer
return container(name_or_buffer)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 463, in init
super().init(torch._C.PyTorchFileWriter(self.name))
RuntimeError: File reads/2024-02-04T14:26:47-e0.pth cannot be opened.
During the training process,when the model try to save checkpoints,this error happened:
Traceback (most recent call last):
File "main.py", line 1026, in
trainer.fit(model, data)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 740, in fit
self._call_and_handle_interrupt(
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 685, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 777, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1199, in _run
self._dispatch()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1279, in _dispatch
self.training_type_plugin.start_training(self)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\plugins\training_type\training_type_plugin.py", line 202, in start_training
self._results = trainer.run_stage()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1289, in run_stage
return self._run_train()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1319, in _run_train
self.fit_loop.run()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\base.py", line 145, in run
self.advance(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\fit_loop.py", line 234, in advance
self.epoch_loop.run(data_fetcher)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\base.py", line 151, in run
output = self.on_run_end()
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 298, in on_run_end
self.trainer.call_hook("on_train_epoch_end")
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1495, in call_hook
callback_fx(*args, **kwargs)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\callback_hook.py", line 93, in on_train_epoch_end
callback.on_train_epoch_end(self, self.lightning_module)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 321, in on_train_epoch_end
self.save_checkpoint(trainer)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 400, in save_checkpoint
self._save_last_checkpoint(trainer, monitor_candidates)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py", line 672, in _save_last_checkpoint
trainer.save_checkpoint(filepath, self.save_weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1913, in save_checkpoint
self.checkpoint_connector.save_checkpoint(filepath, weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\connectors\checkpoint_connector.py", line 477, in save_checkpoint
_checkpoint = self.dump_checkpoint(weights_only)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\pytorch_lightning\trainer\connectors\checkpoint_connector.py", line 416, in dump_checkpoint
model.on_save_checkpoint(checkpoint)
File "D:\RGDiffSR-main\ldm\models\diffusion\ddpm.py", line 586, in on_save_checkpoint
self.cond_stage_model.save_state_dict(
File "D:\RGDiffSR-main\ldm\modules\encoders\tp_generator.py", line 88, in save_state_dict
torch.save(self.crnn_model.state_dict(), os.path.join(path, self.timestamp + f'-e{epoch}.pth'))
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 618, in save
with _open_zipfile_writer(f) as opened_zipfile:
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 492, in _open_zipfile_writer
return container(name_or_buffer)
File "D:\Anaconda3\envs\RGDiffSR\lib\site-packages\torch\serialization.py", line 463, in init
super().init(torch._C.PyTorchFileWriter(self.name))
RuntimeError: File reads/2024-02-04T14:26:47-e0.pth cannot be opened.