#! /usr/bin/perl
# perl chrstrloc.pl file.to.be.reformatted
# remember column numbering start from 0
# perl ./shrimpENSEMBL2genominator.pl s1cs20.mapped
# >95_726_1000_F3	ENSMUSG00000065411 (gene name)|ENSMUST00000083477 (transcript name)|11 (chr)|70048544 (start)|70048637 (end)|1(strand)	-	21	42	4	25	25	220	22	
my ($mappingFileName) = @ARGV;

open INFILE_MAP, "<$mappingFileName" or die $!;
open OUTFILE_MAP, ">$mappingFileName.bed" or die $!;
#in logos file I get some info needed for quality evaluation of the results the name of the ensembl gene and the description of the alignment and start of alignment in reference seq and start in read
open OUTFILE_LOGOS, ">$mappingFileName.logos" or die $!;
  while (my $line = <INFILE_MAP>){
          if($line =~ /^>/){
             my @myarr = split('\t', $line);
             my @tmparr = split('\|', $myarr[1]);
             if ($tmparr[2] =~ /^MT/) {
                  $chr = "77"; #77 is the num associated to M for MT
             } 
             elsif ($tmparr[2] =~ /^X/) {
                  $chr = "88"; #to convert it again $num2wrd = pack("C*", "$wrd2num");
             } 
             elsif ($tmparr[2] =~ /^Y/) {
                  $chr = "89"; #
             } elsif ($tmparr[2] =~ /^[0-9]/) {
                  $chr = $tmparr[2]; #all chr named by num
             }
            $strand = $myarr[2];
             if ($strand =~ /\+/) {
                  $strand = "1"; #
             } 
             elsif ($strand =~ /-/) {
                  $strand = "-1"; #
             }
             my $string = join("\t", $chr, $strand, $tmparr[3]); #chr, strand (1, -1), start alignment
             my $logos = join("\t", $tmparr[0], $myarr[9], $myarr[3], $myarr[5]);  #gene name, aligning info, start in gene, start in read
             print OUTFILE_MAP $string;
             print OUTFILE_MAP "\n";
             print OUTFILE_LOGOS $logos;
             print OUTFILE_LOGOS "\n";
          }
  }  
close INFILE_MAP;
close OUTFILE_MAP;
