478 lines
17 KiB
Matlab
478 lines
17 KiB
Matlab
classdef ustar < handle
|
|
% Low-level utilities for UNIX standard tar files.
|
|
properties (Access = public)
|
|
File % tar file name
|
|
IndexFile % index file name
|
|
IOMode % file opened in read-only or read-write mode?
|
|
NumberOfSubfiles % number of subfiles
|
|
end
|
|
properties (Access = private)
|
|
% File info
|
|
fileID
|
|
ioflag
|
|
subFile
|
|
subFileBeg
|
|
subFileSize
|
|
% Current subfile information
|
|
currentFile
|
|
currentMode
|
|
currentUID
|
|
currentGID
|
|
currentFileSize
|
|
currentModtime
|
|
currentLink
|
|
currentLinkname
|
|
currentUsername
|
|
currentGroupname
|
|
currentDevMajor
|
|
currentDevMinor
|
|
currentFileBeg
|
|
% Constants
|
|
scanBuffSize = 2^17; % buffer size of scanner (max. number of files in tar)
|
|
extrBuffSize = 4194304; % buffer size of extracter
|
|
blockSize = 512; % ustar block size (do not change)
|
|
end
|
|
%% ------------------------------------------------------------------------%%
|
|
%% CONSTRUCORS/DESTRUCTORS %%
|
|
%% ------------------------------------------------------------------------%%
|
|
methods(Access=public)
|
|
function obj = ustar()
|
|
% obj = ucf()
|
|
% Default contructor
|
|
obj.resetPublicProperties();
|
|
obj.resetPrivateProperties();
|
|
obj.resetCurrent();
|
|
end
|
|
function delete(obj)
|
|
% obj.delete()
|
|
% Default destructor
|
|
obj.close();
|
|
end
|
|
end
|
|
%% ------------------------------------------------------------------------%%
|
|
%% INITIALIZATION METHODS %%
|
|
%% ------------------------------------------------------------------------%%
|
|
methods(Access=public)
|
|
function open(obj,file)
|
|
% obj.open(file)
|
|
% Opens a file in read-only mode
|
|
obj.File = file;
|
|
obj.IOMode = 'read';
|
|
obj.ioflag = 'r';
|
|
obj.fileID = fopen(obj.File,obj.ioflag);
|
|
if obj.fileID<0
|
|
error('Unable to open file: %s',obj.File);
|
|
end
|
|
obj.scanArchive();
|
|
obj.resetCurrent();
|
|
end
|
|
function openIndexed(obj,tarfile,indexfile)
|
|
% obj.open(tarfile,indexfile)
|
|
% Opens a file in read-only mode while using available
|
|
% indexing data.
|
|
% Input
|
|
% tarfile path to TAR file
|
|
% indexfile path to index file (in json/msgpack/taridx format)
|
|
obj.File = tarfile;
|
|
obj.IndexFile = indexfile;
|
|
obj.IOMode = 'read';
|
|
obj.ioflag = 'r';
|
|
obj.fileID = fopen(obj.File,obj.ioflag);
|
|
if obj.fileID<0
|
|
error('Unable to open file: %s',obj.File);
|
|
end
|
|
obj.scanIndexFile();
|
|
obj.resetCurrent();
|
|
end
|
|
end
|
|
%% ------------------------------------------------------------------------%%
|
|
%% PUBLIC METHODS %%
|
|
%% ------------------------------------------------------------------------%%
|
|
methods(Access=public)
|
|
function close(obj)
|
|
% obj.close()
|
|
% Closes a file
|
|
if obj.fileID<0
|
|
return;
|
|
end
|
|
status = fclose(obj.fileID);
|
|
if status<0
|
|
warning('Unable to close file (exit code: %d)',status);
|
|
return;
|
|
end
|
|
obj.resetPublicProperties();
|
|
obj.resetPrivateProperties();
|
|
obj.resetCurrent();
|
|
obj.fileID = -1;
|
|
end
|
|
function [ptr] = pointer(obj,fname)
|
|
% [ptr] = obj.pointer(fname)
|
|
% Returns a 'pointer' to the requested file within the tar-ball
|
|
% which can be used to read the data without extracting.
|
|
% Input
|
|
% fname file name of subfile within tar-ball
|
|
% Output
|
|
% ptr pointer: [fid,first byte,number of bytes]
|
|
idx = obj.findSubfile(fname);
|
|
ptr = [obj.fileID,obj.subFileBeg(idx),obj.subFileSize(idx)];
|
|
end
|
|
function [fname,fsize] = list(obj)
|
|
% [fname,fsize] = obj.list()
|
|
% Returns a list of name/size of all subfiles within the tar-ball
|
|
% Output
|
|
% fname cell array with filenames
|
|
% fsize array with file sizes in bytes
|
|
fname = obj.subFile;
|
|
fsize = obj.subFileSize;
|
|
end
|
|
function extract(obj,fname,varargin)
|
|
% obj.extract(fname)
|
|
% Extracts the requested subfile to a standalone file.
|
|
% Input
|
|
% fname name of subfile
|
|
% ? outfile path of output file (default: fname)
|
|
par = inputParser;
|
|
addParamValue(par,'outfile',fname,@ischar);
|
|
parse(par,varargin{:});
|
|
outfile = par.Results.outfile;
|
|
|
|
idx = obj.findSubfile(fname);
|
|
fbeg = obj.subFileBeg(idx);
|
|
fsize = obj.subFileSize(idx);
|
|
fidw = fopen(outfile,'w');
|
|
fseek(obj.fileID,fbeg,'bof');
|
|
% Chunk the file
|
|
nchunk = ceil(fsize/obj.extrBuffSize);
|
|
nchunkFull = floor(fsize/obj.extrBuffSize);
|
|
nchunkPart = nchunk-nchunkFull;
|
|
for ichunk=1:nchunkFull
|
|
buff = fread(obj.fileID,[1,obj.extrBuffSize],'*uint8');
|
|
fwrite(fidw,buff);
|
|
end
|
|
if nchunkPart>0
|
|
sizeChunkPart = mod(fsize,obj.extrBuffSize);
|
|
buff = fread(obj.fileID,[1,sizeChunkPart],'*uint8');
|
|
fwrite(fidw,buff);
|
|
end
|
|
fclose(fidw);
|
|
end
|
|
function [flag] = isSubfile(obj,fname)
|
|
% [flag] = obj.isSubfile(fname)
|
|
% Checks if a subfile exists within tar-ball.
|
|
% Input
|
|
% fname name of subfile
|
|
flag = any(ismember(obj.subFile,fname));
|
|
end
|
|
function writeIndex(obj,outfile)
|
|
% obj.writeIndex(outfile)
|
|
% Write a index file for tar archive in custom '.taridx' format
|
|
% The format is:
|
|
% nsubfile int64
|
|
% [nsubfile times]
|
|
% subFileName 256*char
|
|
% subFileBeg int64
|
|
% subFileSize int64
|
|
% Input
|
|
% outfile name of index file to be written (with extension '.taridx')
|
|
fid = fopen(outfile,'wb');
|
|
fwrite(fid,obj.NumberOfSubfiles,'int64');
|
|
for ii=1:obj.NumberOfSubfiles
|
|
nchar = length(obj.subFile{ii});
|
|
subfile = blanks(256);
|
|
subfile(1:nchar) = obj.subFile{ii};
|
|
subfile(nchar+1) = 0;
|
|
fwrite(fid,subfile,'256*char');
|
|
fwrite(fid,obj.subFileBeg(ii),'int64');
|
|
fwrite(fid,obj.subFileSize(ii),'int64');
|
|
end
|
|
fclose(fid);
|
|
end
|
|
function [fname,foffset,fsize,nfile] = dumpIndex(obj)
|
|
% obj.dumpIndex()
|
|
% Get indexing data of tarfile
|
|
% Output
|
|
% fname cell array of file names
|
|
% foffset data offset within tar file
|
|
% fsize data size
|
|
% nfile number of files in archive
|
|
nfile = obj.NumberOfSubfiles;
|
|
fname = obj.subFile;
|
|
foffset = obj.subFileBeg;
|
|
fsize = obj.subFileSize;
|
|
end
|
|
end
|
|
%% ------------------------------------------------------------------------%%
|
|
%% PRIVATE METHODS %%
|
|
%% ------------------------------------------------------------------------%%
|
|
methods(Access=private)
|
|
function scanArchive(obj)
|
|
% obj.scanArchive()
|
|
% Scans the tar-ball for subfiles and stores meta-data in class variables.
|
|
obj.subFile = cell(obj.scanBuffSize,1);
|
|
obj.subFileBeg = zeros(obj.scanBuffSize,1);
|
|
obj.subFileSize = zeros(obj.scanBuffSize,1);
|
|
% Jump to start of file
|
|
fseek(obj.fileID,0,'bof');
|
|
% Loop over (unknown) number of subfiles and evaluate header
|
|
ii = 0;
|
|
while ~obj.checkEOF()
|
|
ii = ii+1;
|
|
obj.readHeader(true);
|
|
obj.subFile{ii} = obj.currentFile;
|
|
obj.subFileSize(ii) = obj.currentFileSize;
|
|
obj.subFileBeg(ii) = obj.currentFileBeg;
|
|
nblock = ceil(obj.currentFileSize/obj.blockSize);
|
|
fseek(obj.fileID,nblock*obj.blockSize,'cof');
|
|
end
|
|
% Truncate preallocated arrays
|
|
obj.NumberOfSubfiles = ii;
|
|
obj.subFile = obj.subFile(1:ii);
|
|
obj.subFileSize = obj.subFileSize(1:ii);
|
|
obj.subFileBeg = obj.subFileBeg(1:ii);
|
|
if obj.NumberOfSubfiles>obj.scanBuffSize
|
|
warning('Number of subfiles exceeds scanBuffSize.');
|
|
end
|
|
obj.resetCurrent();
|
|
end
|
|
function scanIndexFile(obj)
|
|
% obj.scanIndexFile()
|
|
% Reads tar meta-data from index file into class variables.
|
|
|
|
% Check encoding of indexing file
|
|
[~,~,fileExtension] = fileparts(obj.IndexFile);
|
|
switch fileExtension
|
|
case '.simplejson'
|
|
% Open and read file contents (ASCII)
|
|
indexfileID = fopen(obj.IndexFile,'r');
|
|
if indexfileID<0
|
|
error('Unable to open file: %s',obj.IndexFile);
|
|
end
|
|
fseek(indexfileID,0,'bof');
|
|
jsonstr = fread(indexfileID,'schar=>char')';
|
|
fclose(indexfileID);
|
|
% Parse JSON and reconstruct filenames
|
|
if ~isempty(which('jsonlab.loadjson'))
|
|
% User function from matlab central
|
|
% This function is preferred, since filenames can be
|
|
% reconstructed safely from parsed JSON (. <=> _0x2E_)
|
|
json = jsonlab.loadjson(jsonstr);
|
|
jsonFields = fieldnames(json);
|
|
tarFileName = strrep(jsonFields,'_0x2E_','.');
|
|
elseif ~isempty(which('jsondecode'))
|
|
% Built-in function
|
|
% Second choice only, since filename might be ambiguous
|
|
% if it has no extension, but contains underscore. (. => _)
|
|
json = jsondecode(jsonstr);
|
|
jsonFields = fieldnames(json);
|
|
idxtmp = strfind(jsonFields,'_');
|
|
tarFileName = jsonFields;
|
|
for ifield=1:length(jsonFields)
|
|
if ~isempty(idxtmp{ifield})
|
|
tarFileName{ifield}(idxtmp{ifield}(end)) = '.';
|
|
end
|
|
end
|
|
else % no JSON decoder available
|
|
error('No JSON decoder available.');
|
|
end
|
|
% Extract important fields
|
|
nsubfile = length(jsonFields);
|
|
for isub=1:nsubfile
|
|
tarFileOffset(isub) = json.(jsonFields{isub}).offset;
|
|
tarFileSize(isub) = json.(jsonFields{isub}).size;
|
|
end
|
|
case '.msgpack'
|
|
% Open and read file contents (binary)
|
|
indexfileID = fopen(obj.IndexFile,'rb');
|
|
if indexfileID<0
|
|
error('Unable to open file: %s',obj.IndexFile);
|
|
end
|
|
fseek(indexfileID,0,'bof');
|
|
msgbytes = fread(indexfileID,'uint8=>uint8');
|
|
fclose(indexfileID);
|
|
% Parse msgpack
|
|
if ~isempty(which('msgpack.parsemsgpack'))
|
|
msg = msgpack.parsemsgpack(msgbytes);
|
|
tarFileName = msg.keys;
|
|
nsubfile = length(tarFileName);
|
|
tarFileSize = zeros(1,nsubfile);
|
|
tarFileOffset = zeros(1,nsubfile);
|
|
for isub=1:nsubfile
|
|
tmp = msg(tarFileName{isub});
|
|
tarFileOffset(isub) = double(tmp{1});
|
|
tarFileSize(isub) = double(tmp{2});
|
|
end
|
|
else % no msgpack decoder available
|
|
error('No msgpack decoder available.');
|
|
end
|
|
case '.taridx'
|
|
% Open and read file contents (binary)
|
|
indexfileID = fopen(obj.IndexFile,'rb');
|
|
if indexfileID<0
|
|
error('Unable to open file: %s',obj.IndexFile);
|
|
end
|
|
fseek(indexfileID,0,'bof');
|
|
nsubfile = fread(indexfileID,1,'int64=>double');
|
|
tarFileName = cell(1,nsubfile);
|
|
tarFileSize = zeros(1,nsubfile);
|
|
tarFileOffset = zeros(1,nsubfile);
|
|
for isub=1:nsubfile
|
|
tarFileName{isub} = deblank(fread(indexfileID,[1,256],'schar=>char'));
|
|
tarFileOffset(isub) = fread(indexfileID,1,'int64=>double');
|
|
tarFileSize(isub) = fread(indexfileID,1,'int64=>double');
|
|
end
|
|
fclose(indexfileID);
|
|
otherwise
|
|
error('Unknown file extension of index file: %s',fileExtension);
|
|
end
|
|
|
|
% Order by offset, i.e. file order within tarball and assign
|
|
% to class variables
|
|
[~,idxsort] = sort(tarFileOffset);
|
|
obj.subFile = {tarFileName{idxsort}}';
|
|
obj.subFileBeg = tarFileOffset(idxsort)';
|
|
obj.subFileSize = tarFileSize(idxsort)';
|
|
obj.NumberOfSubfiles = nsubfile;
|
|
end
|
|
function readHeader(obj,scanMode)
|
|
% obj.readHeader(scanMode)
|
|
% Reads header data of a subfile in tar-ball and stores information
|
|
% in 'current*' class-variables.
|
|
% Input
|
|
% scanMode when set to true, omit parts which are not needed during scan
|
|
header = fread(obj.fileID,[1,obj.blockSize],'schar=>char');
|
|
% Extract header information
|
|
name = header(1:100);
|
|
mode = header(101:108);
|
|
uid = header(109:116);
|
|
gid = header(117:124);
|
|
fsize = header(125:136);
|
|
mtime = header(137:148);
|
|
chksum = header(149:156);
|
|
link = header(157);
|
|
linkname = header(158:257);
|
|
magic = header(258:263);
|
|
version = header(264:265);
|
|
uname = header(266:297);
|
|
gname = header(298:329);
|
|
devmajor = header(330:337);
|
|
devminor = header(338:345);
|
|
prefix = header(346:500);
|
|
% Evaluate checksum
|
|
chksum1 = ustar.computeChecksum(header);
|
|
chksum2 = ustar.parseOctalStr(chksum);
|
|
if chksum1~=chksum2
|
|
error('Checksum mismatch! %d,%d',chksum1,chksum2);
|
|
end
|
|
% Evaluate magic
|
|
%if ~strcmp(ustar.parseStr(magic),'ustar')
|
|
if isempty(strfind(ustar.parseStr(magic),'ustar'))
|
|
error(' Not a UNIX standard tar file.')
|
|
end
|
|
% Parse header information
|
|
obj.currentFile = ustar.parseStr([prefix,name]);
|
|
obj.currentFileBeg = ftell(obj.fileID);
|
|
obj.currentFileSize = ustar.parseOctalStr(fsize);
|
|
if ~scanMode
|
|
obj.currentMode = ustar.parseStr(mode);
|
|
obj.currentUID = ustar.parseOctalStr(uid);
|
|
obj.currentGID = ustar.parseOctalStr(gid);
|
|
obj.currentModtime = datestr(ustar.parseOctalStr(mtime)/86400+datenum(1970,1,1));
|
|
obj.currentLink = ustar.parseOctalStr(link);
|
|
obj.currentLinkname = ustar.parseStr(linkname);
|
|
obj.currentUsername = ustar.parseStr(uname);
|
|
obj.currentGroupname = ustar.parseStr(gname);
|
|
obj.currentDevMajor = ustar.parseOctalStr(devmajor);
|
|
obj.currentDevMinor = ustar.parseOctalStr(devminor);
|
|
end
|
|
end
|
|
function [isEOF] = checkEOF(obj)
|
|
% [isEOF] = obj.checkEOF()
|
|
% Checks if end-of-file is reached (two blocks of binary zeros).
|
|
% Output
|
|
% isEOF flag which indicates end-of-file
|
|
isEOF = false;
|
|
curPosition = ftell(obj.fileID);
|
|
blockref = zeros(1,obj.blockSize,'int8');
|
|
blockcur = fread(obj.fileID,[1,obj.blockSize],'int8=>int8');
|
|
if feof(obj.fileID)
|
|
isEOF = true;
|
|
end
|
|
if isequal(blockcur,blockref)
|
|
blockcur = fread(obj.fileID,[1,obj.blockSize],'int8=>int8');
|
|
if isequal(blockcur,blockref)
|
|
isEOF = true;
|
|
return;
|
|
end
|
|
end
|
|
fseek(obj.fileID,curPosition,'bof');
|
|
end
|
|
function [idx] = findSubfile(obj,fname)
|
|
% [idx] = obj.findSubfile(fname)
|
|
% Get index of requested subfile
|
|
% Input
|
|
% fname name of subfile
|
|
% Output
|
|
% idx index of subfile
|
|
isReqFile = ismember(obj.subFile,fname);
|
|
switch sum(isReqFile)
|
|
case 0; error('File not found: %s',fname);
|
|
case 1;
|
|
otherwise; warning('More than one matching file found.');
|
|
end
|
|
idx = find(isReqFile);
|
|
end
|
|
function resetPublicProperties(obj)
|
|
obj.File = [];
|
|
obj.IOMode = [];
|
|
obj.NumberOfSubfiles = [];
|
|
end
|
|
function resetPrivateProperties(obj)
|
|
obj.ioflag = [];
|
|
obj.subFile = [];
|
|
obj.subFileBeg = [];
|
|
obj.subFileSize = [];
|
|
end
|
|
function resetCurrent(obj)
|
|
obj.currentFile = [];
|
|
obj.currentMode = [];
|
|
obj.currentUID = [];
|
|
obj.currentGID = [];
|
|
obj.currentFileSize = [];
|
|
obj.currentModtime = [];
|
|
obj.currentLink = [];
|
|
obj.currentLinkname = [];
|
|
obj.currentUsername = [];
|
|
obj.currentGroupname = [];
|
|
obj.currentDevMajor = [];
|
|
obj.currentDevMinor = [];
|
|
obj.currentFileBeg = [];
|
|
end
|
|
end
|
|
%% ------------------------------------------------------------------------%%
|
|
%% PRIVATE STATIC METHODS %%
|
|
%% ------------------------------------------------------------------------%%
|
|
methods(Access=private,Static)
|
|
function [chksum] = computeChecksum(block)
|
|
block(149:156) = ' '; % checksum is computed with spaces in check sum field
|
|
chksum = sum(block);
|
|
end
|
|
function [str] = parseStr(str)
|
|
charZero = cast(0,'char');
|
|
str = strrep(str,charZero,'');
|
|
end
|
|
function [num] = parseOctalStr(str)
|
|
num = ustar.oct2dec_long(str2double(ustar.parseStr(str)));
|
|
end
|
|
function [dec] = oct2dec_long(oct)
|
|
dec = 0;
|
|
ii = 1;
|
|
while floor(oct/10^(ii-1))~=0
|
|
cbase = 8^(ii-1);
|
|
cfact = floor(mod(oct,10^ii)/10^(ii-1));
|
|
dec = dec + cfact*cbase;
|
|
ii = ii+1;
|
|
end
|
|
end
|
|
end
|
|
end
|