function [stroked,extrad,basetime] = readcpd(fname) % [stroked,extrad,basetime] = readcpd(fname) more off; global hf; global hv; hufftable; fid = fopen(fname,'r','b'); % check if file opened ok [rawdd,rawcc] = fread(fid,'uint8'); % check that at least 11 bytes were read ptr=1; pageno=NaN; extrano=1; stroked=cell(0,0); basetime=convtime(rawdd(6:11)); % assumes enough data while(ptr<=rawcc) [reltime,segcode,dd,ptr] = getsegment(ptr,rawdd,basetime); if(segcode==4) pageno=dd; if(pageno>length(stroked)) stroked{pageno}=cell(0,3); end end if(segcode>=3) extrad{extrano,1}=dd; extrad{extrano,2}=reltime; extrad{extrano,3}=segcode; extrad{extrano,4}=pageno; extrano=extrano+1; else stroked{pageno}{end+1,1}=dd; stroked{pageno}{end,2}=reltime; stroked{pageno}{end,3}=segcode; end end fclose(fid); fprintf(1,'Done! (Read %d out of %d bytes ok.)\n',ptr-1,rawcc); % ----------------------------------------------------------------- % function to get segments function [reltime,segcode,dd,newptr] = getsegment(ptr,rawdd,prevtime) pp=ptr; ver = rawdd(pp:(pp+3)); pp=pp+4; segcode=rawdd(pp); pp=pp+1; reltime = convtime(rawdd(pp:(pp+5)))-prevtime; pp=pp+6; %fprintf(1,'Segment [%6d] [%2d] ',reltime,segcode); fprintf(1,'Segment [%6d] ',reltime); dd=[]; switch(segcode) case hex2dec('01'), fprintf(1,'(huffman data: '); ll=256*rawdd(pp)+rawdd(pp+1); pp=pp+2; dd=huffdecode(rawdd(pp:(pp+ll-1))); pp=pp+ll; fprintf(1,'%d points)\n',256*rawdd(pp-2)+rawdd(pp-1)), case hex2dec('02'), fprintf(1,'(huffman data*: '); ll=256*rawdd(pp)+rawdd(pp+1); pp=pp+2; dd=huffdecode(rawdd(pp:(pp+ll-1))); pp=pp+ll; fprintf(1,'%d points)\n',256*rawdd(pp-2)+rawdd(pp-1)), case hex2dec('03'), fprintf(1,'(keyword selection)\n'), dd=rawdd(pp:(pp+5-1)); pp=pp+5; case hex2dec('04'), dd=rawdd(pp:(pp+4-1)); pp=pp+4; dd = 2^24*dd(1)+2^16*dd(2)+2^8*dd(3)+dd(4); fprintf(1,'(page number %d)\n',dd), case hex2dec('05'), fprintf(1,'(clock adjust)\n'), dd=rawdd(pp:(pp+6-1)); pp=pp+6; case hex2dec('06'), fprintf(1,'(misc data (original CP))\n'), dd=rawdd(pp:(pp+14-1)); pp=pp+14; case hex2dec('0a'), dd=char(rawdd(pp:(pp+8-1))); pp=pp+8; fprintf(1,'(filename "%s")\n',dd), case hex2dec('0d'), dd=char(rawdd(pp:(pp+8-1))); pp=pp+8; fprintf(1,'(title "%s")\n',dd), case hex2dec('0e'), fprintf(1,'(mystery1)\n'), dd=rawdd(pp:(pp+1-1)); pp=pp+1; case hex2dec('1d'), fprintf(1,'(misc data (CP XP))\n'), dd=rawdd(pp:(pp+37-1)); pp=pp+37; case hex2dec('35'), fprintf(1,'(mystery2 (CP XP))\n'), case hex2dec('36'), case hex2dec('37'), % I think for CP XP this is also bookmark fprintf(1,'(bookmark)\n'), case hex2dec('39'), fprintf(1,'(mystery3 (SDK))\n'), dd=rawdd(pp:(pp+33-1)); pp=pp+33; case hex2dec('3a'), fprintf(1,'(last download timestamp)\n'), case hex2dec('3c'), fprintf(1,'(mystery4 (SDK))\n'), dd=rawdd(pp:(pp+12-1)); pp=pp+12; case hex2dec('3d'), fprintf(1,'(mystery5 (SDK))\n'), dd=rawdd(pp:(pp+10-1)); pp=pp+10; case hex2dec('3e'), fprintf(1,'(mystery6 (SDK))\n'), dd=rawdd(pp:(pp+4-1)); pp=pp+4; otherwise fprintf(1,'(ERROR -- unknown segment code %d)\n',segcode) end newptr=pp; % ----------------------------------------------------------------- % function to parse arcane (and non Y2K compliant!) date formatting function [secs] = convtime(tbytes) secs = round(24*60*60*datenum([dec2hex(tbytes(1)),'-', ... dec2hex(tbytes(2)),'-', ... '19',dec2hex(tbytes(3)),' ', ... dec2hex(tbytes(4)),':', ... dec2hex(tbytes(5)),':', ... dec2hex(tbytes(6))] )); % ----------------------------------------------------------------- % function to unfold the huffmann encoded data function [pts] = huffdecode(dd) global hf; global hv; ptr=1; pts=[256*dd(1)+dd(2);256*dd(3)+dd(4)]; %pts=pts-(pts>=2^15).*[2^16;2^16]; ptr=ptr+4; bitstr=''; bitpos=1; nextbyte=dec2bin(dd(ptr),8); ptr=ptr+1; terminate=0; mm =[]; deltax=NaN; while(~terminate) while(isempty(mm) & (length(bitstr)<=15)) if(bitpos>8) nextbyte=dec2bin(dd(ptr),8); ptr=ptr+1; bitpos=1; end bitstr=[bitstr,nextbyte(bitpos)]; bitpos=bitpos+1; mm = strmatch(bitstr,hf,'exact'); end if(isempty(mm)) fprintf(1,'---ERROR in huffman decoding---'); terminate=1; elseif(mm==36) % fprintf(1,'Found huffmann terminator\n'); terminate=1; elseif(mm==35) % fprintf(1,'huffmann special byte\n'); thebytestr=nextbyte(bitpos:8); nextbyte=dec2bin(dd(ptr),8); ptr=ptr+1; thebytestr=[thebytestr,nextbyte(1:(bitpos-1))]; thebyte=bin2dec(thebytestr); if(thebyte>127) thebyte=thebyte-256; end % thebyte if(isnan(deltax)) deltax=thebyte; else pts=[pts,pts(:,end)+[deltax;thebyte]]; deltax=NaN; end else % fprintf(1,'At pos %d found relative %d \n',ptr,hv{mm}); if(isnan(deltax)) deltax=hv{mm}; else pts=[pts,pts(:,end)+[deltax;hv{mm}]]; deltax=NaN; end end bitstr=''; mm=[]; end % at this point, length(dd)-ptr should be 2 % % dd(ptr) should be 255 % 255*dd(ptr+1)+dd(ptr+2) should be # of points decoded %255*dd(ptr+1)+dd(ptr+2) %size(pts) function [hf] =hufftable global hf; global hv; hf=cell(36,1); hf{1}= '110101011111001'; hv{1}=-16; hf{2}= '110101011110'; hv{2}=-15; hf{3}= '1100100100'; hv{3}=-14; hf{4}= '110010011'; hv{4}=-13; hf{5}= '110101010'; hv{5}=-12; hf{6}= '11010100'; hv{6}=-11; hf{7}= '1000100'; hv{7}=-10; hf{8}= '1010011'; hv{8}=-9; hf{9}= '1101011'; hv{9}=-8; hf{10}= '101000'; hv{10}=-7; hf{11}= '110011'; hv{11}=-6; hf{12}= '10000'; hv{12}=-5; hf{13}= '11000'; hv{13}=-4; hf{14}= '11011'; hv{14}=-3; hf{15}= '1011'; hv{15}=-2; hf{16}= '010'; hv{16}=-1; hf{17}= '00'; hv{17}=0; hf{18}= '011'; hv{18}=1; hf{19}= '1001'; hv{19}=2; hf{20}= '10101'; hv{20}=3; hf{21}= '110100'; hv{21}=4; hf{22}= '100011'; hv{22}=5; hf{23}= '1100101'; hv{23}=6; hf{24}= '1010010'; hv{24}=7; hf{25}= '11001000'; hv{25}=8; hf{26}= '10001010'; hv{26}=9; hf{27}= '100010111'; hv{27}=10; hf{28}= '100010110'; hv{28}=11; hf{29}= '1100100101'; hv{29}=12; hf{30}= '11010101110'; hv{30}=13; hf{31}= '11010101100'; hv{31}=14; hf{32}= '11010101101'; hv{32}=15; hf{33}= '1101010111111'; hv{33}=16; hf{34}= '11010101111101'; hv{34}=18; hf{35}= '110101011111000'; hv{35}=NaN; % '110101011111000xxxxxxxx' means "xxxxxxxx" in signed byte % '11010101111100000000000' means 0 hf{36}= '11111111'; hv{36}=NaN; % termination signal % NOTE: missing values at very end are to be filled in, but we don't % know how -- zero padding?