You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
209 lines
4.8 KiB
Matlab
209 lines
4.8 KiB
Matlab
function [data,class,inTrain,inTest]=load_dataset()
|
|
GP=global_parameters;
|
|
fixedSplit=0;
|
|
proportionTrain=0.75; %proportion of data to use for training (the rest is use for testing)
|
|
|
|
switch GP.data_set
|
|
case 'USPS'
|
|
load('../Data/usps.mat');
|
|
fixedSplit=7291;
|
|
classIndex=257; dataIndeces=1:256; numClasses=10; numExemplars=9298; intensity=1;
|
|
class=data(:,classIndex)';
|
|
data=data(:,dataIndeces)';
|
|
|
|
case 'ISOLET'
|
|
data=load('../Data/UCIClassification/isolet1+2+3+4.data');
|
|
test=load('../Data/UCIClassification/isolet5.data');
|
|
fixedSplit=size(data,1);
|
|
data=[data;test];
|
|
classIndex=618; dataIndeces=1:617; numClasses=26; numExemplars=7797;
|
|
class=data(:,classIndex)';
|
|
data=data(:,dataIndeces)';
|
|
|
|
case 'MNIST'
|
|
trainingImagesPerDigit=2000;
|
|
data=[];class=[];
|
|
for i=1:10
|
|
load(['../Data/LeCun-MNIST_handwrittendigits/digit',int2str(i-1)]);
|
|
class=[class,i*ones(1,trainingImagesPerDigit)];
|
|
data=[data,D(1:trainingImagesPerDigit,:)'];
|
|
end
|
|
fixedSplit=size(data,2);
|
|
for i=1:10,
|
|
load(['../Data/LeCun-MNIST_handwrittendigits/test',int2str(i-1)]);
|
|
class=[class,i*ones(1,size(D,1))];
|
|
data=[data,D'];
|
|
end
|
|
|
|
case 'NORB'
|
|
datafile='../Data/SmallNORB/smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat';
|
|
catfile='../Data/SmallNORB/smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat';
|
|
[data,classTrain]=read_norb_data(datafile,catfile);
|
|
fixedSplit=size(data,2);
|
|
datafile='../Data/SmallNORB/smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat';
|
|
catfile='../Data/SmallNORB/smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat';
|
|
[test,classTest]=read_norb_data(datafile,catfile);
|
|
data=[data,test];
|
|
class=[classTrain,classTest];
|
|
|
|
case 'CIFAR10'
|
|
alldata=[]; class=[];
|
|
for i=1:5
|
|
load(['../Data/cifar-10/data_batch_',int2str(i)]);
|
|
data=convert_to_grey(data,[32,32]);
|
|
alldata=[alldata,data'];
|
|
class=[class,labels'];
|
|
end
|
|
fixedSplit=size(alldata,2);
|
|
load('../Data/cifar-10/test_batch');
|
|
data=convert_to_grey(data,[32,32]);
|
|
alldata=[alldata,data'];
|
|
class=[class,labels'];
|
|
data=alldata;
|
|
class=double(class);
|
|
|
|
case 'ARTIF'
|
|
[data,class]=artificial_data;
|
|
fixedSplit=5;
|
|
|
|
otherwise
|
|
disp('ERROR: unknown data set');
|
|
end
|
|
|
|
%ensure class labels are sequentially numbered starting from 1
|
|
class=class+max(0,1-min(class));
|
|
k=0;
|
|
for c=unique(class)
|
|
k=k+1;
|
|
class(class==c)=k;
|
|
end
|
|
%scale data to range between 0 and 1
|
|
data=data-nanmin(nanmin(data));
|
|
data=data./nanmax(nanmax(data));
|
|
data(isnan(data))=0; %replace any missing values with zeros
|
|
|
|
if GP.onoff
|
|
data=data_preprocess_on_off(data);
|
|
elseif GP.neg
|
|
[poo,data]=data_preprocess_on_off(data);
|
|
end
|
|
|
|
%split data into two sub-sets. inTrain and inTest provide in indeces of these datasets
|
|
[inTrain,inTest]=split_data(length(class),proportionTrain,fixedSplit);
|
|
|
|
|
|
|
|
|
|
function [data,class]=read_norb_data(datafile,catfile)
|
|
numPatterns=200;%24300;
|
|
fprintf(1,'loading NORB database: ');
|
|
fid=fopen(catfile,'r');
|
|
for p=1:5 %read header information - and ignore it
|
|
fread(fid,4,'uchar');
|
|
end
|
|
class=fread(fid,numPatterns,'int')';
|
|
fclose(fid);
|
|
|
|
fid=fopen(datafile,'r');
|
|
for p=1:6 %read header information - and ignore it
|
|
fread(fid,4,'uchar');
|
|
end
|
|
%data=zeros((96*96)*0.25^2,numPatterns);
|
|
data=zeros(96*96,numPatterns);
|
|
for i=1:numPatterns
|
|
if rem(i,1000)==0, fprintf(1,'.%i.',i); end
|
|
for j=1:2 %take one image in each pair
|
|
I=fread(fid,96*96);
|
|
end
|
|
I=255-I;
|
|
%I=I-min(min(I));I=I./max(max(I));
|
|
%Itmp=reshape(I,96,96); Itmp=imresize(Itmp, 0.25); I=Itmp;
|
|
data(:,i)=I(:);
|
|
end
|
|
fclose(fid);
|
|
|
|
|
|
function dataOut=convert_to_grey(data,imSize)
|
|
[n,m]=size(data);
|
|
dataOut=zeros(n,m/3);
|
|
for j=1:n
|
|
for colour=1:3
|
|
I(:,:,colour)=reshape(data(j,1+(colour-1)*prod(imSize):colour*prod(imSize)),imSize);
|
|
end
|
|
Ig=rgb2gray(im2double(I));
|
|
dataOut(j,:)=Ig(:)';
|
|
end
|
|
|
|
|
|
function [X,data]=data_preprocess_on_off(data)
|
|
%[numFeatures,numPatterns]=size(data);
|
|
|
|
data=2.*bsxfun(@minus,data,mean(data));
|
|
|
|
xOn=data;
|
|
xOn(xOn<0)=0;
|
|
xOff=-data;
|
|
xOff(xOff<0)=0;
|
|
X=[xOn;xOff];
|
|
|
|
|
|
|
|
function [data,class]=artificial_data
|
|
|
|
if 0
|
|
I=zeros(11,11);
|
|
%I(6,1:6)=1;I(6:11,6)=1;
|
|
I(1,1:11)=1;
|
|
data(:,1)=I(:);
|
|
|
|
I=zeros(11,11);
|
|
%I(6,1:6)=1;I(1:6,6)=1;
|
|
I(1,1:11)=1;I(1:11,1)=1;
|
|
data(:,2)=I(:);
|
|
|
|
I=zeros(11,11);
|
|
%I(6,6:11)=1;I(6:11,6)=1;
|
|
I(1,1:11)=1;I(1:11,1)=1;I(11,1:11)=1;
|
|
data(:,3)=I(:);
|
|
|
|
I=zeros(11,11);
|
|
%I(6,6:11)=1;I(1:6,6)=1;
|
|
I(1,1:11)=1;I(1:11,1)=1;I(11,1:11)=1;I(1:11,11)=1;
|
|
data(:,4)=I(:);
|
|
|
|
I=zeros(11,11);
|
|
%I(6,1:11)=1;I(1:11,6)=1;
|
|
I(11,1:11)=1;I(1:11,11)=1;
|
|
data(:,5)=I(:);
|
|
|
|
class=[1:5];
|
|
|
|
data=[data,data];
|
|
class=[class,class];
|
|
|
|
end
|
|
|
|
I=zeros(11,11);
|
|
I(6,6)=1;
|
|
data(:,1)=I(:);
|
|
|
|
I=zeros(11,11);
|
|
I(5:7,5:7)=1;
|
|
data(:,2)=I(:);
|
|
|
|
I=zeros(11,11);
|
|
I(4:8,4:8)=1;
|
|
data(:,3)=I(:);
|
|
|
|
class=[1,1,2];
|
|
|
|
if 0
|
|
for j=6:10
|
|
for i=1:121
|
|
if rand<0.125
|
|
if data(i,j)>0.5, data(i,j)=0;
|
|
else data(i,j)=1; end
|
|
end
|
|
end
|
|
end
|
|
end |