
function alignment=myread_bowtie_block_tall(fname,block_size)
% leaner than samread, 20x faster and ~10x less ram?
%%

if ~exist('block_size','var')
    block_size=10000000;% this seems to be the fastest on my computer for any file size. Not sure if this is hardware specific?
end

%fname="results_bowtie.txt";

%c=fileread(fname);
%n=1;
fid=fopen(fname);
fseek(fid,0,'eof');
pos=ftell(fid);
batch_num=ceil(pos./block_size);
fseek(fid,0,'bof');
[output_dir,name,ext] = fileparts(fname);
%%
remainder='';
%
% q1=cell(batch_num,1);
% nm1=cell(batch_num,1);
% 
% fname_query=fullfile(output_dir,'query.csv');
% fname_flag=fullfile(output_dir,'flag.csv');
% fname_ref=fullfile(output_dir,'ref.csv');
% fname_nm=fullfile(output_dir,'nm.csv');
fname_sum=fullfile(output_dir,'summary.csv');

% fid_query=fopen(fname_query,'w');
% fid_flag=fopen(fname_flag,'w');
% fid_ref=fopen(fname_ref,'w');
% fid_nm=fopen(fname_nm,'w');
fid_sum=fopen(fname_sum,'w');
fprintf(fid_sum,'QueryName,Flag,ReferenceName,NM\n'); % print header row




f=waitbar(0,sprintf('Converting bowtie results in %u blocks ...',batch_num));
pause(0.0000000000001);


%
% tic
%while 1
m=1;
for n=1:batch_num
    if rem(n,10)==0
        fprintf('%u\n',n);
    end

%tic
    c=fread(fid,block_size,'*char')';
    %toc
    last_idx=find(c==newline,1,'last');
    %toc
    old_remainder=remainder;
    remainder_next=c(last_idx+1:end);
    c=[newline,remainder,c(1:last_idx)];% add remainder from last block
    remainder=remainder_next;% update remainder for the next block
    %toc
    A=regexp(c,'(?<=\n)[\w]*\t[\w]*\t[\w]*','match');
    a1=regexp(A,'\t','split')';
    %toc
    q1=vertcat(a1{:});
    %toc
    anm1=regexp(c,'(?<=NM:i:)\w*','match')';
    %toc
    nm1=uint8(double(string(anm1)));
    %toc     
    if size(q1,1)~=numel(nm1)
        warning('split outputs did not match in block %u',n);
        fname="errorblock"+m+".txt";
        fid_err=fopen(fname,'w');
        fprintf(fid_err,c);
        fclose(fid_err);

        fname="error_remainder"+m+".txt";
        fid_err=fopen(fname,'w');
        fprintf(fid_err,old_remainder);
        fclose(fid_err);
        pause(0.00001)

        m=m+1;
    end
    if ~isempty(q1)
        % T1=struct('QueryName',q1all{n}(:,1), ...
        %     'Flag',num2cell(uint16(double(string(q1all{n}(:,2))))), ...
        %     'ReferenceName',q1all{n}(:,3), ...
        %     'NM',num2cell(nm1all{n}));
        % T1=table(q1(:,1),...
        %     q1(:,2))), ...
        %     q1all{n}(:,3), ...
        %     num2cell(nm1all{n}), ...
        %     'VariableNames',{'QueryName','Flag','ReferenceName','NM'});
         % fprintf(fid_query,'%s\n',string(q1(:,1)));
         % fprintf(fid_flag,'%u\n',double(string(q1(:,2))));
         % fprintf(fid_ref,'%s\n',string(q1(:,3)));
         % fprintf(fid_nm,'%u\n',nm1);
        %fwrite(fid_query,sprintf('%s\n',string(q1(:,1))),'char');
        %fwrite(fid_flag,double(string(q1(:,2))),'uint16');
        %fwrite(fid_ref,sprintf('%s\n',string(q1(:,3))),'char');
        %fwrite(fid_nm,nm1,'uint8');
        
        fprintf(fid_sum,'%s', ...
            string(compose('%s,%s,%s,%u\n', ...
                string(q1(:,1)), ...
                string(q1(:,2)), ...
                string(q1(:,3)), ...
                nm1)));
    end
    waitbar(n/batch_num,f);
    pause(0.0000000000001);

end
close(f);

% toc
fclose(fid);
% fclose(fid_query);
% fclose(fid_flag);
% fclose(fid_ref);
% fclose(fid_nm);
fclose(fid_sum);

ds = tabularTextDatastore(fname_sum, ...
    'Delimiter',',', ...
    'ReadVariableNames',true, ...
    'ReadSize',20000);

alignment=tall(ds);
%% try to combine all the fields into a table, if possible.
% fid_query=fopen(fname_query,'r');
% fid_flag=fopen(fname_flag,'r');
% fid_ref=fopen(fname_ref,'r');
% fid_nm=fopen(fname_nm,'r');
% 
% try
%     query=textscan(fid_query,'%s');
%     flag=textscan(fid_flag,'%u16');
%     ref=textscan(fid_ref,'%s');
%     nm=textscan(fid_nm,'%u8');
% 
% 
% 
%     T=struct('QueryName',query{1}, ...
%         'Flag',num2cell(flag{1}), ...
%         'ReferenceName',ref{1}, ...
%         'NM',num2cell(nm{1}));
% catch
%     T=[];
%     warning('Output is too large for memory.')
% end
% 
% fclose(fid_query);
% fclose(fid_flag);
% fclose(fid_ref);
% fclose(fid_nm);

end

