使用PyTorch将文件夹下的图片分为训练集和验证集实例
Python  /  管理员 发布于 5年前   336
PyTorch提供了ImageFolder的类来加载文件结构如下的图片数据集:
root/dog/xxx.pngroot/dog/xxy.pngroot/dog/xxz.pngroot/cat/123.pngroot/cat/nsdf3.pngroot/cat/asd932_.png
使用这个类的问题在于无法将训练集(training dataset)和验证集(validation dataset)分开。我写了两个类来完成这个工作。
import osimport torchfrom torch.utils.data import Dataset, DataLoaderfrom torchvision.transforms import ToTensor, Resize, Composefrom PIL import Imagefrom sklearn.model_selection import train_test_splitclass ImageFolderSplitter: # images should be placed in folders like: # --root # ----root\dogs # ----root\dogs\image1.png # ----root\dogs\image2.png # ----root\cats # ----root\cats\image1.png # ----root\cats\image2.png # path: the root of the image folder def __init__(self, path, train_size = 0.8): self.path = path self.train_size = train_size self.class2num = {} self.num2class = {} self.class_nums = {} self.data_x_path = [] self.data_y_label = [] self.x_train = [] self.x_valid = [] self.y_train = [] self.y_valid = [] for root, dirs, files in os.walk(path): if len(files) == 0 and len(dirs) > 1: for i, dir1 in enumerate(dirs): self.num2class[i] = dir1 self.class2num[dir1] = i elif len(files) > 1 and len(dirs) == 0: category = "" for key in self.class2num.keys(): if key in root:category = keybreak label = self.class2num[category] self.class_nums[label] = 0 for file1 in files: self.data_x_path.append(os.path.join(root, file1)) self.data_y_label.append(label) self.class_nums[label] += 1 else: raise RuntimeError("please check the folder structure!") self.x_train, self.x_valid, self.y_train, self.y_valid = train_test_split(self.data_x_path, self.data_y_label, shuffle = True, train_size = self.train_size) def getTrainingDataset(self): return self.x_train, self.y_train def getValidationDataset(self): return self.x_valid, self.y_validclass DatasetFromFilename(Dataset): # x: a list of image file full path # y: a list of image categories def __init__(self, x, y, transforms = None): super(DatasetFromFilename, self).__init__() self.x = x self.y = y if transforms == None: self.transforms = ToTensor() else: self.transforms = transforms def __len__(self): return len(self.x) def __getitem__(self, idx): img = Image.open(self.x[idx]) img = img.convert("RGB") return self.transforms(img), torch.tensor([[self.y[idx]]])# test code# splitter = ImageFolderSplitter("for_test")# transforms = Compose([Resize((51, 51)), ToTensor()])# x_train, y_train = splitter.getTrainingDataset()# training_dataset = DatasetFromFilename(x_train, y_train, transforms=transforms)# training_dataloader = DataLoader(training_dataset, batch_size=2, shuffle=True)# x_valid, y_valid = splitter.getValidationDataset()# validation_dataset = DatasetFromFilename(x_valid, y_valid, transforms=transforms)# validation_dataloader = DataLoader(validation_dataset, batch_size=2, shuffle=True)# for x, y in training_dataloader:# print(x.shape, y.shape)
更多的代码可以在我的Github reop下找到。
122 在
学历:一种延缓就业设计,生活需求下的权衡之选中评论 工作几年后,报名考研了,到现在还没认真学习备考,迷茫中。作为一名北漂互联网打工人..123 在
Clash for Windows作者删库跑路了,github已404中评论 按理说只要你在国内,所有的流量进出都在监控范围内,不管你怎么隐藏也没用,想搞你分..原梓番博客 在
在Laravel框架中使用模型Model分表最简单的方法中评论 好久好久都没看友情链接申请了,今天刚看,已经添加。..博主 在
佛跳墙vpn软件不会用?上不了网?佛跳墙vpn常见问题以及解决办法中评论 @1111老铁这个不行了,可以看看近期评论的其他文章..1111 在
佛跳墙vpn软件不会用?上不了网?佛跳墙vpn常见问题以及解决办法中评论 网站不能打开,博主百忙中能否发个APP下载链接,佛跳墙或极光..
Copyright·© 2019 侯体宗版权所有·
粤ICP备20027696号