diff --git a/README.md b/README.md index f0f50e60..645224d9 100644 --- a/README.md +++ b/README.md @@ -115,4 +115,48 @@ Finally, DeepLense help combat the problem of noisy and low-resolution of real l **Pranath Reddy** performs a comparative study of the super-resolution of strong lensing images in their [GSoC 2023 project](https://summerofcode.withgoogle.com/archive/2023/projects/Rh8kJLr4), using Residual Models with Content Loss and Conditional Diffusion Models, on the Model 1 dataset. #### 3.3.3 Physics-Informed Unsupervised Super-Resolution of Strong Lensing Images -**Anirudh Shankar** explores the unsupervised super-resolution of strong lensing images through a Physics-Informed approach in his [GSoC 2024 project](https://summerofcode.withgoogle.com/programs/2024/projects/AvlaMMJJ), built to handle sparse datasets. They use custom datasets using different lens models and light profiles. \ No newline at end of file +**Anirudh Shankar** explores the unsupervised super-resolution of strong lensing images through a Physics-Informed approach in his [GSoC 2024 project](https://summerofcode.withgoogle.com/programs/2024/projects/AvlaMMJJ), built to handle sparse datasets. They use custom datasets using different lens models and light profiles. +## 🆕 Loading `.npy` Dataset + +This repository now supports loading datasets stored in `.npy` format using a custom PyTorch Dataset class. + +### 📌 Usage + +```python +from dataset.npy_loader import NPYDataset + +# Initialize dataset +dataset = NPYDataset(data_dir="path/to/npy_files") + +# Get one sample +image, label = dataset[0] + +print(image.shape, label) +``` + +### 📂 Supported Formats + +* `(H, W)` → Grayscale images +* `(H, W, C)` → RGB / multi-channel images + +### ⚠️ Notes + +* Ensure all `.npy` files contain valid NumPy arrays +* All files should have consistent shape +* Labels can be: + + * encoded in filenames (e.g., `cat_1.npy`, `dog_2.npy`) + * or handled separately in your dataset class + +### 💡 Example Directory Structure + +``` +data/ +├── cat_1.npy +├── cat_2.npy +├── dog_1.npy +``` + +### 🚀 Benefit + +This feature allows direct training on `.npy` datasets without converting them into image formats, improving efficiency and flexibility. diff --git a/npy_dataset.py b/npy_dataset.py new file mode 100644 index 00000000..d3969e8c --- /dev/null +++ b/npy_dataset.py @@ -0,0 +1,50 @@ +import os +import torch +from torch.utils.data import Dataset +import numpy as np + +class NPYDataset(Dataset): + """ + Custom PyTorch Dataset for loading .npy-based DeepLense data. + """ + + def __init__(self, root_dir): + self.samples = [] + + # Only include directories (ignore files like dataset.py) + self.class_names = [ + d for d in os.listdir(root_dir) + if os.path.isdir(os.path.join(root_dir, d)) + ] + + self.class_to_idx = {cls: i for i, cls in enumerate(self.class_names)} + + for cls in self.class_names: + class_dir = os.path.join(root_dir, cls) + + for file in os.listdir(class_dir): + if file.endswith(".npy"): + self.samples.append( + (os.path.join(class_dir, file), self.class_to_idx[cls]) + ) + + def __len__(self): + return len(self.samples) + + def __getitem__(self, idx): + file_path, label = self.samples[idx] + + data = np.load(file_path) + data = torch.tensor(data, dtype=torch.float32) + + # Ensure shape is compatible with CNN (3 channels, 224x224) + if len(data.shape) == 2: + data = data.unsqueeze(0) + + data = torch.nn.functional.interpolate( + data.unsqueeze(0), size=(224, 224), mode="bilinear" + ).squeeze(0) + + data = data.repeat(3, 1, 1) + + return data, label