ucftools/matlab/@ustar/ustar.m

306 lines
10 KiB
Matlab

classdef ustar < handle
% Low-level utilities for UNIX standard tar files.
properties (Access = public)
File % file name
IOMode % file opened in read-only or read-write mode?
NumberOfSubfiles % number of subfiles
end
properties (Access = private)
% File info
fileID
ioflag
subFile
subFileBeg
subFileSize
% Current subfile information
currentFile
currentMode
currentUID
currentGID
currentFileSize
currentModtime
currentLink
currentLinkname
currentUsername
currentGroupname
currentDevMajor
currentDevMinor
currentFileBeg
% Constants
scanBuffSize = 2^17; % buffer size of scanner (max. number of files in tar)
extrBuffSize = 4194304; % buffer size of extracter
blockSize = 512; % ustar block size (do not change)
end
%% ------------------------------------------------------------------------%%
%% CONSTRUCORS/DESTRUCTORS %%
%% ------------------------------------------------------------------------%%
methods(Access=public)
function obj = ustar()
% obj = ucf()
% Default contructor
obj.resetPublicProperties();
obj.resetPrivateProperties();
obj.resetCurrent();
end
function delete(obj)
% obj.delete()
% Default destructor
obj.close();
end
end
%% ------------------------------------------------------------------------%%
%% INITIALIZATION METHODS %%
%% ------------------------------------------------------------------------%%
methods(Access=public)
function open(obj,file)
% obj.open(file)
% Opens a file in read-only mode
obj.File = file;
obj.IOMode = 'read';
obj.ioflag = 'r';
obj.fileID = fopen(obj.File,obj.ioflag);
if obj.fileID<0
error('Unable to open file: %s',obj.File);
end
obj.scanArchive();
obj.resetCurrent();
end
end
%% ------------------------------------------------------------------------%%
%% PUBLIC METHODS %%
%% ------------------------------------------------------------------------%%
methods(Access=public)
function close(obj)
% obj.close()
% Closes a file
if obj.fileID<0
return;
end
status = fclose(obj.fileID);
if status<0
warning('Unable to close file (exit code: %d)',status);
return;
end
obj.resetPublicProperties();
obj.resetPrivateProperties();
obj.resetCurrent();
obj.fileID = -1;
end
function [ptr] = pointer(obj,fname)
% [ptr] = obj.pointer(fname)
% Returns a 'pointer' to the requested file within the tar-ball
% which can be used to read the data without extracting.
% Input
% fname file name of subfile within tar-ball
% Output
% ptr pointer: [fid,first byte,number of bytes]
idx = obj.findSubfile(fname);
ptr = [obj.fileID,obj.subFileBeg(idx),obj.subFileSize(idx)];
end
function [fname,fsize] = list(obj)
% [fname,fsize] = obj.list()
% Returns a list of name/size of all subfiles within the tar-ball
% Output
% fname cell array with filenames
% fsize array with file sizes in bytes
fname = obj.subFile;
fsize = obj.subFileSize;
end
function extract(obj,fname)
% obj.extract(fname)
% Extracts the requested subfile to a standalone file.
% Input
% fname name of subfile
idx = obj.findSubfile(fname);
fbeg = obj.subFileBeg(idx);
fsize = obj.subFileSize(idx);
fidw = fopen(fname,'w');
fseek(obj.fileID,fbeg,'bof');
% Chunk the file
nchunk = ceil(fsize/obj.extrBuffSize);
nchunkFull = floor(fsize/obj.extrBuffSize);
nchunkPart = nchunk-nchunkFull;
for ichunk=1:nchunkFull
buff = fread(obj.fileID,[1,obj.extrBuffSize],'char=>char');
fwrite(fidw,buff,'char');
end
if nchunkPart>0
sizeChunkPart = mod(fsize,obj.extrBuffSize);
buff = fread(obj.fileID,[1,sizeChunkPart],'char=>char');
fwrite(fidw,buff,'char');
end
fclose(fidw);
end
end
%% ------------------------------------------------------------------------%%
%% PRIVATE METHODS %%
%% ------------------------------------------------------------------------%%
methods(Access=private)
function scanArchive(obj)
% obj.scanArchive()
% Scans the tar-ball for subfiles and stores meta-data in class variables.
obj.subFile = cell(obj.scanBuffSize,1);
obj.subFileBeg = zeros(obj.scanBuffSize,1);
obj.subFileSize = zeros(obj.scanBuffSize,1);
% Jump to start of file
fseek(obj.fileID,0,'bof');
% Loop over (unknown) number of subfiles and evaluate header
ii = 0;
while ~obj.checkEOF()
ii = ii+1;
obj.readHeader(true);
obj.subFile{ii} = obj.currentFile;
obj.subFileSize(ii) = obj.currentFileSize;
obj.subFileBeg(ii) = obj.currentFileBeg;
nblock = ceil(obj.currentFileSize/obj.blockSize);
fseek(obj.fileID,nblock*obj.blockSize,'cof');
end
% Truncate preallocated arrays
obj.NumberOfSubfiles = ii;
obj.subFile = obj.subFile(1:ii);
obj.subFileSize = obj.subFileSize(1:ii);
obj.subFileBeg = obj.subFileBeg(1:ii);
if obj.NumberOfSubfiles>obj.scanBuffSize
warning('Number of subfiles exceeds scanBuffSize.');
end
obj.resetCurrent();
end
function readHeader(obj,scanMode)
% obj.readHeader(scanMode)
% Reads header data of a subfile in tar-ball and stores information
% in 'current*' class-variables.
% Input
% scanMode when set to true, omit parts which are not needed during scan
header = fread(obj.fileID,[1,obj.blockSize],'char=>char');
% Extract header information
name = header(1:100);
mode = header(101:108);
uid = header(109:116);
gid = header(117:124);
fsize = header(125:136);
mtime = header(137:148);
chksum = header(149:156);
link = header(157);
linkname = header(158:257);
magic = header(258:263);
version = header(264:265);
uname = header(266:297);
gname = header(298:329);
devmajor = header(330:337);
devminor = header(338:345);
prefix = header(346:500);
% Evaluate checksum
chksum1 = ustar.computeChecksum(header);
chksum2 = ustar.parseOctalStr(chksum);
if chksum1~=chksum2
error('Checksum mismatch! %d,%d',chksum1,chksum2);
end
% Evaluate magic
if ~strcmp(ustar.parseStr(magic),'ustar')
error(' Not a UNIX standard tar file.')
end
% Parse header information
obj.currentFile = ustar.parseStr([prefix,name]);
obj.currentFileBeg = ftell(obj.fileID);
obj.currentFileSize = ustar.parseOctalStr(fsize);
if ~scanMode
obj.currentMode = ustar.parseStr(mode);
obj.currentUID = ustar.parseOctalStr(uid);
obj.currentGID = ustar.parseOctalStr(gid);
obj.currentModtime = datestr(ustar.parseOctalStr(mtime)/86400+datenum(1970,1,1));
obj.currentLink = ustar.parseOctalStr(link);
obj.currentLinkname = ustar.parseStr(linkname);
obj.currentUsername = ustar.parseStr(uname);
obj.currentGroupname = ustar.parseStr(gname);
obj.currentDevMajor = ustar.parseOctalStr(devmajor);
obj.currentDevMinor = ustar.parseOctalStr(devminor);
end
end
function [isEOF] = checkEOF(obj)
% [isEOF] = obj.checkEOF()
% Checks if end-of-file is reached (two blocks of binary zeros).
% Output
% isEOF flag which indicates end-of-file
isEOF = false;
curPosition = ftell(obj.fileID);
blockref = zeros(1,obj.blockSize,'int8');
blockcur = fread(obj.fileID,[1,obj.blockSize],'int8=>int8');
if isequal(blockcur,blockref)
blockcur = fread(obj.fileID,[1,obj.blockSize],'int8=>int8');
if isequal(blockcur,blockref)
isEOF = true;
return;
end
end
fseek(obj.fileID,curPosition,'bof');
end
function [idx] = findSubfile(obj,fname)
% [idx] = obj.findSubfile(fname)
% Get index of requested subfile
% Input
% fname name of subfile
% Output
% idx index of subfile
isReqFile = ismember(obj.subFile,fname);
switch sum(isReqFile)
case 0; error('File not found: %s',fname);
case 1;
otherwise; warning('More than one matching file found.');
end
idx = find(isReqFile);
end
function resetPublicProperties(obj)
obj.File = [];
obj.IOMode = [];
obj.NumberOfSubfiles = [];
end
function resetPrivateProperties(obj)
obj.ioflag = [];
obj.subFile = [];
obj.subFileBeg = [];
obj.subFileSize = [];
end
function resetCurrent(obj)
obj.currentFile = [];
obj.currentMode = [];
obj.currentUID = [];
obj.currentGID = [];
obj.currentFileSize = [];
obj.currentModtime = [];
obj.currentLink = [];
obj.currentLinkname = [];
obj.currentUsername = [];
obj.currentGroupname = [];
obj.currentDevMajor = [];
obj.currentDevMinor = [];
obj.currentFileBeg = [];
end
end
%% ------------------------------------------------------------------------%%
%% PRIVATE STATIC METHODS %%
%% ------------------------------------------------------------------------%%
methods(Access=private,Static)
function [chksum] = computeChecksum(block)
block(149:156) = ' '; % checksum is computed with spaces in check sum field
chksum = sum(block);
end
function [str] = parseStr(str)
charZero = cast(0,'char');
str = strrep(str,charZero,'');
end
function [num] = parseOctalStr(str)
num = ustar.oct2dec_long(str2double(ustar.parseStr(str)));
end
function [dec] = oct2dec_long(oct)
dec = 0;
ii = 1;
while floor(oct/10^(ii-1))~=0
cbase = 8^(ii-1);
cfact = floor(mod(oct,10^ii)/10^(ii-1));
dec = dec + cfact*cbase;
ii = ii+1;
end
end
end
end