classdef ustar < handle % Low-level utilities for UNIX standard tar files. properties (Access = public) File % tar file name IndexFile % index file name IOMode % file opened in read-only or read-write mode? NumberOfSubfiles % number of subfiles end properties (Access = private) % File info fileID ioflag subFile subFileBeg subFileSize % Current subfile information currentFile currentMode currentUID currentGID currentFileSize currentModtime currentLink currentLinkname currentUsername currentGroupname currentDevMajor currentDevMinor currentFileBeg % Constants scanBuffSize = 2^17; % buffer size of scanner (max. number of files in tar) extrBuffSize = 4194304; % buffer size of extracter blockSize = 512; % ustar block size (do not change) end %% ------------------------------------------------------------------------%% %% CONSTRUCORS/DESTRUCTORS %% %% ------------------------------------------------------------------------%% methods(Access=public) function obj = ustar() % obj = ucf() % Default contructor obj.resetPublicProperties(); obj.resetPrivateProperties(); obj.resetCurrent(); end function delete(obj) % obj.delete() % Default destructor obj.close(); end end %% ------------------------------------------------------------------------%% %% INITIALIZATION METHODS %% %% ------------------------------------------------------------------------%% methods(Access=public) function open(obj,file) % obj.open(file) % Opens a file in read-only mode obj.File = file; obj.IOMode = 'read'; obj.ioflag = 'r'; obj.fileID = fopen(obj.File,obj.ioflag); if obj.fileID<0 error('Unable to open file: %s',obj.File); end obj.scanArchive(); obj.resetCurrent(); end function openIndexed(obj,tarfile,indexfile) % obj.open(tarfile,indexfile) % Opens a file in read-only mode while using available % indexing data. % Input % tarfile path to TAR file % indexfile path to index file (in json/msgpack/taridx format) obj.File = tarfile; obj.IndexFile = indexfile; obj.IOMode = 'read'; obj.ioflag = 'r'; obj.fileID = fopen(obj.File,obj.ioflag); if obj.fileID<0 error('Unable to open file: %s',obj.File); end obj.scanIndexFile(); obj.resetCurrent(); end end %% ------------------------------------------------------------------------%% %% PUBLIC METHODS %% %% ------------------------------------------------------------------------%% methods(Access=public) function close(obj) % obj.close() % Closes a file if obj.fileID<0 return; end status = fclose(obj.fileID); if status<0 warning('Unable to close file (exit code: %d)',status); return; end obj.resetPublicProperties(); obj.resetPrivateProperties(); obj.resetCurrent(); obj.fileID = -1; end function [ptr] = pointer(obj,fname) % [ptr] = obj.pointer(fname) % Returns a 'pointer' to the requested file within the tar-ball % which can be used to read the data without extracting. % Input % fname file name of subfile within tar-ball % Output % ptr pointer: [fid,first byte,number of bytes] idx = obj.findSubfile(fname); ptr = [obj.fileID,obj.subFileBeg(idx),obj.subFileSize(idx)]; end function [fname,fsize] = list(obj) % [fname,fsize] = obj.list() % Returns a list of name/size of all subfiles within the tar-ball % Output % fname cell array with filenames % fsize array with file sizes in bytes fname = obj.subFile; fsize = obj.subFileSize; end function extract(obj,fname,varargin) % obj.extract(fname) % Extracts the requested subfile to a standalone file. % Input % fname name of subfile % ? outfile path of output file (default: fname) par = inputParser; addParamValue(par,'outfile',fname,@ischar); parse(par,varargin{:}); outfile = par.Results.outfile; idx = obj.findSubfile(fname); fbeg = obj.subFileBeg(idx); fsize = obj.subFileSize(idx); fidw = fopen(outfile,'w'); fseek(obj.fileID,fbeg,'bof'); % Chunk the file nchunk = ceil(fsize/obj.extrBuffSize); nchunkFull = floor(fsize/obj.extrBuffSize); nchunkPart = nchunk-nchunkFull; for ichunk=1:nchunkFull buff = fread(obj.fileID,[1,obj.extrBuffSize],'*uint8'); fwrite(fidw,buff); end if nchunkPart>0 sizeChunkPart = mod(fsize,obj.extrBuffSize); buff = fread(obj.fileID,[1,sizeChunkPart],'*uint8'); fwrite(fidw,buff); end fclose(fidw); end function [flag] = isSubfile(obj,fname) % [flag] = obj.isSubfile(fname) % Checks if a subfile exists within tar-ball. % Input % fname name of subfile flag = any(ismember(obj.subFile,fname)); end function writeIndex(obj,outfile) % obj.writeIndex(outfile) % Write a index file for tar archive in custom '.taridx' format % The format is: % nsubfile int64 % [nsubfile times] % subFileName 256*char % subFileBeg int64 % subFileSize int64 % Input % outfile name of index file to be written (with extension '.taridx') fid = fopen(outfile,'wb'); fwrite(fid,obj.NumberOfSubfiles,'int64'); for ii=1:obj.NumberOfSubfiles nchar = length(obj.subFile{ii}); subfile = blanks(256); subfile(1:nchar) = obj.subFile{ii}; subfile(nchar+1) = 0; fwrite(fid,subfile,'256*char'); fwrite(fid,obj.subFileBeg(ii),'int64'); fwrite(fid,obj.subFileSize(ii),'int64'); end fclose(fid); end function [fname,foffset,fsize,nfile] = dumpIndex(obj) % obj.dumpIndex() % Get indexing data of tarfile % Output % fname cell array of file names % foffset data offset within tar file % fsize data size % nfile number of files in archive nfile = obj.NumberOfSubfiles; fname = obj.subFile; foffset = obj.subFileBeg; fsize = obj.subFileSize; end end %% ------------------------------------------------------------------------%% %% PRIVATE METHODS %% %% ------------------------------------------------------------------------%% methods(Access=private) function scanArchive(obj) % obj.scanArchive() % Scans the tar-ball for subfiles and stores meta-data in class variables. obj.subFile = cell(obj.scanBuffSize,1); obj.subFileBeg = zeros(obj.scanBuffSize,1); obj.subFileSize = zeros(obj.scanBuffSize,1); % Jump to start of file fseek(obj.fileID,0,'bof'); % Loop over (unknown) number of subfiles and evaluate header ii = 0; while ~obj.checkEOF() ii = ii+1; obj.readHeader(true); obj.subFile{ii} = obj.currentFile; obj.subFileSize(ii) = obj.currentFileSize; obj.subFileBeg(ii) = obj.currentFileBeg; nblock = ceil(obj.currentFileSize/obj.blockSize); fseek(obj.fileID,nblock*obj.blockSize,'cof'); end % Truncate preallocated arrays obj.NumberOfSubfiles = ii; obj.subFile = obj.subFile(1:ii); obj.subFileSize = obj.subFileSize(1:ii); obj.subFileBeg = obj.subFileBeg(1:ii); if obj.NumberOfSubfiles>obj.scanBuffSize warning('Number of subfiles exceeds scanBuffSize.'); end obj.resetCurrent(); end function scanIndexFile(obj) % obj.scanIndexFile() % Reads tar meta-data from index file into class variables. % Check encoding of indexing file [~,~,fileExtension] = fileparts(obj.IndexFile); switch fileExtension case '.simplejson' % Open and read file contents (ASCII) indexfileID = fopen(obj.IndexFile,'r'); if indexfileID<0 error('Unable to open file: %s',obj.IndexFile); end fseek(indexfileID,0,'bof'); jsonstr = fread(indexfileID,'schar=>char')'; fclose(indexfileID); % Parse JSON and reconstruct filenames if ~isempty(which('jsonlab.loadjson')) % User function from matlab central % This function is preferred, since filenames can be % reconstructed safely from parsed JSON (. <=> _0x2E_) json = jsonlab.loadjson(jsonstr); jsonFields = fieldnames(json); tarFileName = strrep(jsonFields,'_0x2E_','.'); elseif ~isempty(which('jsondecode')) % Built-in function % Second choice only, since filename might be ambiguous % if it has no extension, but contains underscore. (. => _) json = jsondecode(jsonstr); jsonFields = fieldnames(json); idxtmp = strfind(jsonFields,'_'); tarFileName = jsonFields; for ifield=1:length(jsonFields) if ~isempty(idxtmp{ifield}) tarFileName{ifield}(idxtmp{ifield}(end)) = '.'; end end else % no JSON decoder available error('No JSON decoder available.'); end % Extract important fields nsubfile = length(jsonFields); for isub=1:nsubfile tarFileOffset(isub) = json.(jsonFields{isub}).offset; tarFileSize(isub) = json.(jsonFields{isub}).size; end case '.msgpack' % Open and read file contents (binary) indexfileID = fopen(obj.IndexFile,'rb'); if indexfileID<0 error('Unable to open file: %s',obj.IndexFile); end fseek(indexfileID,0,'bof'); msgbytes = fread(indexfileID,'uint8=>uint8'); fclose(indexfileID); % Parse msgpack if ~isempty(which('msgpack.parsemsgpack')) msg = msgpack.parsemsgpack(msgbytes); tarFileName = msg.keys; nsubfile = length(tarFileName); tarFileSize = zeros(1,nsubfile); tarFileOffset = zeros(1,nsubfile); for isub=1:nsubfile tmp = msg(tarFileName{isub}); tarFileOffset(isub) = double(tmp{1}); tarFileSize(isub) = double(tmp{2}); end else % no msgpack decoder available error('No msgpack decoder available.'); end case '.taridx' % Open and read file contents (binary) indexfileID = fopen(obj.IndexFile,'rb'); if indexfileID<0 error('Unable to open file: %s',obj.IndexFile); end fseek(indexfileID,0,'bof'); nsubfile = fread(indexfileID,1,'int64=>double'); tarFileName = cell(1,nsubfile); tarFileSize = zeros(1,nsubfile); tarFileOffset = zeros(1,nsubfile); for isub=1:nsubfile tarFileName{isub} = deblank(fread(indexfileID,[1,256],'schar=>char')); tarFileOffset(isub) = fread(indexfileID,1,'int64=>double'); tarFileSize(isub) = fread(indexfileID,1,'int64=>double'); end fclose(indexfileID); otherwise error('Unknown file extension of index file: %s',fileExtension); end % Order by offset, i.e. file order within tarball and assign % to class variables [~,idxsort] = sort(tarFileOffset); obj.subFile = {tarFileName{idxsort}}'; obj.subFileBeg = tarFileOffset(idxsort)'; obj.subFileSize = tarFileSize(idxsort)'; obj.NumberOfSubfiles = nsubfile; end function readHeader(obj,scanMode) % obj.readHeader(scanMode) % Reads header data of a subfile in tar-ball and stores information % in 'current*' class-variables. % Input % scanMode when set to true, omit parts which are not needed during scan header = fread(obj.fileID,[1,obj.blockSize],'schar=>char'); % Extract header information name = header(1:100); mode = header(101:108); uid = header(109:116); gid = header(117:124); fsize = header(125:136); mtime = header(137:148); chksum = header(149:156); link = header(157); linkname = header(158:257); magic = header(258:263); version = header(264:265); uname = header(266:297); gname = header(298:329); devmajor = header(330:337); devminor = header(338:345); prefix = header(346:500); % Evaluate checksum chksum1 = ustar.computeChecksum(header); chksum2 = ustar.parseOctalStr(chksum); if chksum1~=chksum2 error('Checksum mismatch! %d,%d',chksum1,chksum2); end % Evaluate magic %if ~strcmp(ustar.parseStr(magic),'ustar') if isempty(strfind(ustar.parseStr(magic),'ustar')) error(' Not a UNIX standard tar file.') end % Parse header information obj.currentFile = ustar.parseStr([prefix,name]); obj.currentFileBeg = ftell(obj.fileID); obj.currentFileSize = ustar.parseOctalStr(fsize); if ~scanMode obj.currentMode = ustar.parseStr(mode); obj.currentUID = ustar.parseOctalStr(uid); obj.currentGID = ustar.parseOctalStr(gid); obj.currentModtime = datestr(ustar.parseOctalStr(mtime)/86400+datenum(1970,1,1)); obj.currentLink = ustar.parseOctalStr(link); obj.currentLinkname = ustar.parseStr(linkname); obj.currentUsername = ustar.parseStr(uname); obj.currentGroupname = ustar.parseStr(gname); obj.currentDevMajor = ustar.parseOctalStr(devmajor); obj.currentDevMinor = ustar.parseOctalStr(devminor); end end function [isEOF] = checkEOF(obj) % [isEOF] = obj.checkEOF() % Checks if end-of-file is reached (two blocks of binary zeros). % Output % isEOF flag which indicates end-of-file isEOF = false; curPosition = ftell(obj.fileID); blockref = zeros(1,obj.blockSize,'int8'); blockcur = fread(obj.fileID,[1,obj.blockSize],'int8=>int8'); if feof(obj.fileID) isEOF = true; end if isequal(blockcur,blockref) blockcur = fread(obj.fileID,[1,obj.blockSize],'int8=>int8'); if isequal(blockcur,blockref) isEOF = true; return; end end fseek(obj.fileID,curPosition,'bof'); end function [idx] = findSubfile(obj,fname) % [idx] = obj.findSubfile(fname) % Get index of requested subfile % Input % fname name of subfile % Output % idx index of subfile isReqFile = ismember(obj.subFile,fname); switch sum(isReqFile) case 0; error('File not found: %s',fname); case 1; otherwise; warning('More than one matching file found.'); end idx = find(isReqFile); end function resetPublicProperties(obj) obj.File = []; obj.IOMode = []; obj.NumberOfSubfiles = []; end function resetPrivateProperties(obj) obj.ioflag = []; obj.subFile = []; obj.subFileBeg = []; obj.subFileSize = []; end function resetCurrent(obj) obj.currentFile = []; obj.currentMode = []; obj.currentUID = []; obj.currentGID = []; obj.currentFileSize = []; obj.currentModtime = []; obj.currentLink = []; obj.currentLinkname = []; obj.currentUsername = []; obj.currentGroupname = []; obj.currentDevMajor = []; obj.currentDevMinor = []; obj.currentFileBeg = []; end end %% ------------------------------------------------------------------------%% %% PRIVATE STATIC METHODS %% %% ------------------------------------------------------------------------%% methods(Access=private,Static) function [chksum] = computeChecksum(block) block(149:156) = ' '; % checksum is computed with spaces in check sum field chksum = sum(block); end function [str] = parseStr(str) charZero = cast(0,'char'); str = strrep(str,charZero,''); end function [num] = parseOctalStr(str) num = ustar.oct2dec_long(str2double(ustar.parseStr(str))); end function [dec] = oct2dec_long(oct) dec = 0; ii = 1; while floor(oct/10^(ii-1))~=0 cbase = 8^(ii-1); cfact = floor(mod(oct,10^ii)/10^(ii-1)); dec = dec + cfact*cbase; ii = ii+1; end end end end