=head1 LICENSE

Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2022] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=head1 CONTACT

    Ensembl <http://www.ensembl.org/info/about/contact/index.html>

=cut

=head1 NAME

    UTRAnnotator

=head1 SYNOPSIS
 
    mv UTRAnnotator.pm ~/.vep/Plugins
    vep -i variations.vcf --tab --plugin UTRAnnotator,file=/path/to/uORF_starts_ends_GRCh37_PUBLIC.txt

=head1 DESCRIPTION
 
    A VEP plugin that annotates the effect of 5' UTR variant especially for variant creating/disrupting upstream ORFs.
    Available for both GRCh37 and GRCh38.

    Options are passed to the plugin as key=value pairs, (defaults in parentheses):

    file                : Path to UTRAnnotator data file.
                        - Download from https://github.com/Ensembl/UTRannotator
                        - Download from http://www.sorfs.org/

    Citation

    About the role of 5'UTR variants in human genetic disease:

    Whiffin, N., Karczewski, K.J., Zhang, X. et al. Characterising the loss-of-function impact of 5’ untranslated region variants in 15,708 individuals. Nat Commun 11, 2523 (2020). https://doi.org/10.1038/s41467-019-10717-9

    About UTRAnnotator:

    Annotating high-impact 5'untranslated region variants with the UTRannotator Zhang, X., Wakeling, M.N., Ware, J.S, Whiffin, N. Bioinformatics; doi: https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btaa783/5905476

=cut


package UTRAnnotator;

use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);

use Scalar::Util qw(looks_like_number);

use strict;

sub feature_types {
  return ['Transcript'];
}

sub new {
  my $class = shift;
  my $self = $class->SUPER::new(@_);

  printf "\nWarning: no FASTA file specified (--fasta). VEP running will take a long time." if ($self->config->{fasta} eq "");

  my $param_hash = $self->params_to_hash();

  if (-e $param_hash->{file}){
    open my $fh, "<", $param_hash->{file} or die $!;
    my %uORF_evidence;

    while (<$fh>) {
      chomp;
      my ($chr, $pos, $gene, $strand, $type, $stop_pos) = split /\t/;
      next if $chr eq "chr" && $pos eq "pos";

      die "$chr does not exist; Check your reference file." unless ($chr =~ m/^(chr*)/);
      die "Position $pos is not a number; Check your reference file." unless looks_like_number($pos);

      my $key = $chr . ":" . $pos; # chr has 'chr' prefix
      $uORF_evidence{$key} = 1;
    }

    close $fh;

  $self->{uORF_evidence} = \%uORF_evidence;

  } else {
      printf "Warning: small ORF file not found. For human, you could use the curated list of uORFs found in the repository (https://github.com/Ensembl/UTRannotator):\n" .
      "'uORF_starts_ends_GRCh37_PUBLIC.txt' for GRCh37 or 'uORF_starts_ends_GRCh38_PUBLIC.txt' for GRCh38.\n";
  }

  # Kozak-Strength Class
  my %kozak_strength;
  $kozak_strength{1}='Weak';
  $kozak_strength{2}='Moderate';
  $kozak_strength{3}='Strong';

  $self->{kozak_strength} = \%kozak_strength;

  return $self;
}

sub get_header_info {

  my $self->{_header_info} = {
        UTRAnnotator_five_prime_UTR_variant_consequence => "'Output the variant consequences of a given 5 prime UTR variant: 5_prime_UTR_premature_start_codon_gain_variant, " . 
        "5_prime_UTR_stop_codon_gain_variant, 5_prime_UTR_premature_start_codon_loss_variant, 5_prime_UTR_stop_codon_loss_variant or 5_prime_UTR_uORF_frameshift_variant'",
        UTRAnnotator_five_prime_UTR_variant_annotation => "'Output the annotation of a given 5 prime UTR variant'",
        UTRAnnotator_existing_uORFs => "'The number of existing uORFs with a stop codon within the 5 prime UTR'",
        UTRAnnotator_existing_OutOfFrame_oORFs => "'The number of existing out-of-frame overlapping ORFs (OutOfFrame oORF) at the 5 prime UTR'",
        UTRAnnotator_existing_InFrame_oORFs => "'The number of existing inFrame overlapping ORFs (inFrame oORF) at the 5 prime UTR'",
  };
  return $self->{_header_info};
}

sub run {
  my ($self, $tva) = @_;

  #only annotate the effect if the variant is 5_prime_UTR_variant
  return {} unless grep {$_->SO_term eq '5_prime_UTR_variant'}  @{$tva->get_all_OverlapConsequences};

  #retrieve the variant info
  my $vf = $tva->variation_feature;
  my $chr = ($vf->{chr}||$vf->seq_region_name);
  my $pos = ($vf->{start}||$vf->seq_region_start);
  my @alleles = split /\//, $vf->allele_string;
  my $ref = shift @alleles;
  my $alt = $tva->variation_feature_seq;
  my %variant = (
        "chr" => $chr,
        "pos" => $pos,
        "ref" => $ref,
        "alt" => $alt,
  );

  #retrieve the UTR info: transcript id, strand, five prime UTR sequence, start and end genomic coordinates.
  my $t = $tva->transcript;
  my $transcript_id = (defined $t? $t->stable_id: undef);

  #retrieve the gene symbol of the transcript
  my $symbol = $t->{_gene_symbol} || $t->{_gene_hgnc};

  #retrieve the strand of the transcript
  my $tr_strand = $t->strand + 0;
  my $cds = $t->translateable_seq();

  #retrieve the five prime utr sequence
  my $five_prime_seq = (defined $t->five_prime_utr? $t->five_prime_utr->seq(): undef);

  #Type: five_prime_feature - Bio::EnsEMBL::Feature
  my $UTRs = $t->get_all_five_prime_UTRs();
  my @five_utr_starts;
  my @five_utr_ends;
  foreach  my $utr (@$UTRs){
    my $start = $utr->start(); # this will return the absolute starting positions in chromosome of the UTR exons
    my $end = $utr->end(); # this will return the absolute ending positions in chromosome of the UTR exons
    push(@five_utr_starts, $start);
    push(@five_utr_ends,$end);
  }

  my @sorted_starts = sort {$a <=> $b} @five_utr_starts;
  my @sorted_ends = sort {$a <=> $b} @five_utr_ends;

  my %UTR_info = (
        "gene" => $symbol,
        "start" => \@sorted_starts,
        "end" => \@sorted_ends,
        "seq" => $five_prime_seq,   #the exon sequences in 5'UTR
        "strand" => $tr_strand,
        "cds_seq" => $cds,
  );

  $self->{ref_coding} = $self->get_ref_coding($ref);
  $self->{alt_coding} = $self->get_alt_coding($alt,$tr_strand);
  
  my %uAUG_gained = {};
  my %uSTOP_lost = {};
  my %uAUG_lost = {};
  my %uSTOP_gained = {};
  my %uFrameshift = {};
  my ($mut_pos, $end_pos) = $self->get_allele_exon_pos($tr_strand, $pos, $self->{ref_coding}, \%UTR_info);

  if(defined($mut_pos) && defined($end_pos)) {

    my @sequence = split //, $five_prime_seq;
    my $mut_utr_seq = $self->mut_utr_sequence(
        \@sequence,$mut_pos,
        $self->{ref_coding},
        $self->{alt_coding},
        $tr_strand
    );

    my @utr_sequence = split //, $UTR_info{seq};
    my %existing_uORF = %{$self->existing_uORF(\@utr_sequence)};

    my @mut_utr_seq = split //,$mut_utr_seq;
    my %existing_utr_uORF = %{$self->existing_uORF(\@mut_utr_seq)};

    my @start = @{$self->get_ATG_pos(\@utr_sequence)};

    %uAUG_gained = %{$self->uAUG_gained(\%variant,\%UTR_info, $mut_pos, $mut_utr_seq, \%existing_utr_uORF)};
    %uSTOP_lost = %{$self->uSTOP_lost(\%variant,\%UTR_info, $mut_pos, $mut_utr_seq, \%existing_uORF, \%existing_utr_uORF)};
    %uAUG_lost = %{$self->uAUG_lost(\%variant,\%UTR_info, $mut_pos, $mut_utr_seq, \%existing_uORF, \@start)};
    %uSTOP_gained = %{$self->uSTOP_gained(\%variant,\%UTR_info, $mut_pos, $end_pos, $mut_utr_seq, \%existing_uORF, \%existing_utr_uORF, \@start)};
    %uFrameshift = %{$self->uFrameshift(\%variant,\%UTR_info, $mut_pos, $end_pos, $mut_utr_seq, \%existing_uORF, \%existing_utr_uORF, \@start)};

  }

  my %five_prime_flag = (
        "uAUG_gained" => $uAUG_gained{'uAUG_gained_flag'},
        "uSTOP_lost" => $uSTOP_lost{'uSTOP_lost_flag'},
        "uAUG_lost" => $uAUG_lost{'uAUG_lost_flag'},
        "uSTOP_gained" => $uSTOP_gained{'uSTOP_gained_flag'},
        "uFrameshift" =>$uFrameshift{'uFrameShift_flag'},
  );

  my %five_prime_annotation = (
        "uAUG_gained" => $uAUG_gained{"uAUG_gained_effect"},
        "uSTOP_lost" => $uSTOP_lost{"uSTOP_lost_effect"},
        "uAUG_lost" => $uAUG_lost{"uAUG_lost_effect"},
        "uSTOP_gained" => $uSTOP_gained{'uSTOP_gained_effect'},
        "uFrameshift" =>$uFrameshift{'uFrameShift_effect'},
  );


  my @output_five_prime_flag=();
  my @output_five_prime_annotation=();

  foreach my $flag (keys %five_prime_flag){
    if($five_prime_flag{$flag}){
      push @output_five_prime_flag,$five_prime_flag{$flag};
      push @output_five_prime_annotation,$five_prime_annotation{$flag};
    }
  };

  @output_five_prime_flag=("-") if(!@output_five_prime_flag);
  @output_five_prime_annotation=("-") if(!@output_five_prime_annotation);

  my %utr_effect = (
        "UTRAnnotator_five_prime_UTR_variant_consequence" => (join "&", @output_five_prime_flag),
        "UTRAnnotator_five_prime_UTR_variant_annotation" => (join "&", @output_five_prime_annotation),
  );

  my $existing_uORF_num = $self->count_number_ATG($five_prime_seq);
  my $output ={%utr_effect, %$existing_uORF_num};
  return $output? $output: {};
}

##################
# Main functions #
##################


sub uAUG_gained {
  # Description: annotate if a five_prime_UTR_variant creates ATG

  my ($self, $variant_info,$UTR_info, $mut_pos, $mut_utr_seq, $existing_utr_uorf) = @_;

  my $pos = $variant_info->{pos};
  my $ref = $variant_info->{ref};
  my $alt = $variant_info->{alt};

  my @sequence = split //, $UTR_info->{seq};
  my $strand = $UTR_info->{strand};

  #return annotators
  my $uAUG_gained_DistanceToCDS = "";  # the distance between the gained uAUG to CDS
  my $uAUG_gained_KozakContext = "";  # the Kozak context sequence of the gained uAUG
  my $uAUG_gained_KozakStrength = ""; # the Kozak strength of the gained uAUG
  my $uAUG_gained_type = ""; # the type of uORF created - any of the following: uORF, inframe_oORF,OutOfFrame_oORF
  my $uAUG_gained_DistanceFromCap = ""; # the distance between the gained uAUG to the start of the five prime UTR
  my $uAUG_gained_DistanceToStop = ""; #the distance between the gained uAUG to stop codon (could be in CDS)

  #indicate whether the variant creates a ATG
  my $flag = 0;
  my $current_kozak = "";
  my $current_kozak_strength ="";

  #the relative position of input variant in the UTR sequence

  my %result = (); #result is a hash table with two elements: $flag and $output_effects
  my $output_flag = "";
  my $output_effects = "";

  my $ref_coding = $self->{ref_coding};
  my $alt_coding = $self->{ref_coding};

  my @mut_utr_seq = split //,$mut_utr_seq;
  my $mut_utr_length = @mut_utr_seq;

  #get the nt sequence that might have the pos_A for a new ATG codon
  my @mut_seq = @mut_utr_seq[$mut_pos-2..$mut_pos+length($alt_coding)+1];
  my @mut_atg_pos = @{$self->get_ATG_pos(\@mut_seq)};

  # check whether there is a ATG codon is to check the length of the array
  #if there is a ATG codon, flag it and output the relative positive of A
  my $pos_A;
  if (@mut_atg_pos){
    #get the pos of A (in ATG) in the UTR sequence
    $pos_A=$mut_pos-2+$mut_atg_pos[0];
    $flag=1;
  }

  if ($flag){

    ################################################################################
    #annotator 1: get the distance to the start codon of the main ORF
    ################################################################################
    $uAUG_gained_DistanceToCDS = $mut_utr_length-$pos_A;
    $uAUG_gained_DistanceFromCap = $pos_A;

    ################################################################################
    #annotator 2: determine kozak context;
    ################################################################################
    if ((($pos_A-3)>=0)&&($mut_utr_seq[($pos_A+3)])){
      $current_kozak = $mut_utr_seq[($pos_A-3)].$mut_utr_seq[($pos_A-2)].$mut_utr_seq[$pos_A-1]."ATG".$mut_utr_seq[$pos_A+3];
    }
    else{
      $current_kozak = '-';
    }
    #get the strength of kozak context
    if ($current_kozak !~ /-/){
      my @split_kozak = split //, $current_kozak;
      $current_kozak_strength = 1;
      if ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))&&($split_kozak[6] eq 'G')){
        $current_kozak_strength = 3;
      }
      elsif ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))||($split_kozak[6] eq 'G')){
        $current_kozak_strength = 2;
      }
    }

    $uAUG_gained_KozakContext=$current_kozak;
    $uAUG_gained_KozakStrength=$self->{kozak_strength}{$current_kozak_strength}? $self->{kozak_strength}{$current_kozak_strength}:$current_kozak_strength;


    ################################################################################
    #annotator 3: Type of new ORF with respect the main ORF: uORF/Overlapping_inFrame/Overlapping_outofframe
    ################################################################################

    if(exists($existing_utr_uorf->{$pos_A})){ #if there is stop codon within 5'UTR
      $uAUG_gained_type = "uORF";
    }
    else{
      if(($mut_utr_length - $pos_A) % 3){
        $uAUG_gained_type = "OutOfFrame_oORF";
      }
      else{
        $uAUG_gained_type = "inFrame_oORF";
      }
    }

    my @overlapping_seq = split //, $mut_utr_seq.$UTR_info->{cds_seq};
    my %existing_uORF = %{$self->existing_uORF(\@overlapping_seq)};
    if(exists($existing_uORF{$pos_A})){
      my @stop_pos_array = sort{$a<=>$b}@{$existing_uORF{$pos_A}};
      my $stop_pos = $stop_pos_array[0];
      $uAUG_gained_DistanceToStop = $stop_pos-$pos_A;
    } else {
      $uAUG_gained_DistanceToStop = "NA";
    }


    my %uORF_effect = (
            "uAUG_gained_KozakContext" => $uAUG_gained_KozakContext,
            "uAUG_gained_KozakStrength" => $uAUG_gained_KozakStrength,
            "uAUG_gained_DistanceToCDS" => $uAUG_gained_DistanceToCDS,
            "uAUG_gained_type" => $uAUG_gained_type,
            "uAUG_gained_DistanceToStop" => $uAUG_gained_DistanceToStop,
            "uAUG_gained_CapDistanceToStart" => $uAUG_gained_DistanceFromCap,
    );

    $output_flag = "5_prime_UTR_premature_start_codon_gain_variant";
    $output_effects = $self->transform_hash_to_string(\%uORF_effect);
  }

  $result{'uAUG_gained_flag'} = $output_flag;
  $result{'uAUG_gained_effect'} = $output_effects;

  return \%result;
}

sub uSTOP_gained {
  # Description: annotate whether a five_prime_UTR_variant creates new stop codon. It only evaluate SNVs.

  my ($self, $variant_info,$UTR_info, $mut_pos, $end_pos, $mut_utr_seq, $existing_ref_uORF, $mut_uORF, $start) = @_;

  my $chr = $variant_info->{chr};
  my $pos = $variant_info->{pos};
  my $ref = $variant_info->{ref};
  my $alt = $variant_info->{alt};

  my @sequence = split //, $UTR_info->{seq};
  my $strand = $UTR_info->{strand};
  my $utr_length = @sequence;

  #return annotators
  my $uSTOP_gained_ref_StartDistanceToCDS = "";  # the distance between the uAUG of the disrupting uORF to CDS
  my $uSTOP_gained_KozakContext = "";  # the Kozak context sequence of the disrupting uORF
  my $uSTOP_gained_KozakStrength = ""; # the Kozak strength of the the disrupting uORF
  my $uSTOP_gained_ref_type = ""; # the type of uORF being disrupted - any of the following: uORF, inframe_oORF,OutOfFrame_oORF
  my $uSTOP_gained_newSTOPDistanceToCDS = ""; # the distance between the gained uSTOP to the start of the CDS
  my $uSTOP_gained_evidence = ""; # the translation evidence of the disrupted uORF

  #indicate whether the variant creates a stop codon
  my $flag = 0;
  my $current_kozak = "";
  my $current_kozak_strength ="";

  #the relative position of input variant in the UTR sequence

  my %result = (); #result is a hash table with two elements: $flag and $output_effects
  my $output_flag = 0;
  my $output_effects = "";

  my $ref_coding = $self->{ref_coding};
  my $alt_coding = $self->{alt_coding};

  #only evaluate SNVs and MNVs, for deletion and insertion it would be evaluated in the uframeshift
  return{} unless(length($ref_coding) eq length($alt_coding));
  # the length of the 5'UTR won't change so as the coordinates of the nts

  my @mut_utr_seq = split //,$mut_utr_seq;
  my $mut_utr_length = @mut_utr_seq;

  my @start = @{$start};

  if(@start){
    for (my $i=0;$i<@start;$i++){
      $flag=0;
      my $start_pos = $start[$i];

      # to set the range for searching new STOP codon: not including start_codon, (1) for uORFs - start_codon...stop_codon (2) for oORFs - start_codon...end of 5'UTR

      my $check_point = $start_pos;
      # the checking end point of eligible area
      #For uORF: start_pos .. check_point
      #For overlapping ORF: start_pos .. 3' end of 5'UTR sequence
      #if the variant is entirely in the uORF (within 5'UTR
      if(exists($existing_ref_uORF->{$start_pos})) {
        my @stops = sort {$a <=> $b} @{$existing_ref_uORF->{$start_pos}};
        $check_point = $stops[0]-1;
      } else{
        #if the existing uORF is an oORF
        $check_point = $utr_length-1;
      }

      # only check the ones between start and stop codons
      # ignore the cases at the boundary of start and stop codon since the effect on start/stop would be evaluated anyway
      next if(($mut_pos<$start_pos+3)|($end_pos>$check_point));
      #check whether there are new stop codon induced by this mutation
      my @mut_stops;
      next unless(exists($mut_uORF->{$start_pos}));
      @mut_stops = sort {$a <=> $b} @{$mut_uORF->{$start_pos}};

      my $mut_stop = $mut_stops[0];
      if($mut_stop<$check_point){
        $flag=1;
      }

      if($flag){
        #getting the Kozak context and Kozak strength of the start codon
        if ((($start_pos-3)>=0)&&($sequence[($start_pos+3)])){
          $current_kozak = $sequence[($start_pos-3)].$sequence[($start_pos-2)].$sequence[$start_pos-1]."ATG".$sequence[$start_pos+3];
        }
        else{
          $current_kozak = '-';
        }

        if ($current_kozak !~ /-/){
          my @split_kozak = split //, $current_kozak;
          $current_kozak_strength = 1;
          if ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))&&($split_kozak[6] eq 'G')){
            $current_kozak_strength = 3;
          }
          elsif ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))||($split_kozak[6] eq 'G')){
            $current_kozak_strength = 2;
          }
        }

        $uSTOP_gained_KozakContext=$current_kozak;
        $uSTOP_gained_KozakStrength=$self->{kozak_strength}{$current_kozak_strength}? $self->{kozak_strength}{$current_kozak_strength}:$current_kozak_strength;

        #the annotation of the original uORF
        if (exists($existing_ref_uORF->{$start_pos})){ #if there is stop codon within 5'UTR
          $uSTOP_gained_ref_type = "uORF"
        } elsif (($utr_length-$start_pos) % 3){
          $uSTOP_gained_ref_type = "OutOfFrame_oORF";
        } else {
          $uSTOP_gained_ref_type = "InFrame_oORF";
        }

        $uSTOP_gained_ref_StartDistanceToCDS = $utr_length - $start_pos;
        $uSTOP_gained_newSTOPDistanceToCDS = $mut_utr_length - $mut_stop;

        #find evidence in added reference file
        $uSTOP_gained_evidence = (exists $self->{uORF_evidence})? $self->find_uorf_evidence($UTR_info,$chr,$start_pos): "NA";

        my %uORF_effect = (
                "uSTOP_gained_ref_type" => $uSTOP_gained_ref_type,
                "uSTOP_gained_ref_StartDistanceToCDS" => $uSTOP_gained_ref_StartDistanceToCDS,
                "uSTOP_gained_newSTOPDistanceToCDS" => $uSTOP_gained_newSTOPDistanceToCDS,
                "uSTOP_gained_KozakContext" => $uSTOP_gained_KozakContext,
                "uSTOP_gained_KozakStrength" => $uSTOP_gained_KozakStrength,
                "uSTOP_gained_Evidence" => $uSTOP_gained_evidence,
              );

        $output_flag = $output_flag? $output_flag."&"."5_prime_UTR_stop_codon_gain_variant":"5_prime_UTR_stop_codon_gain_variant";
        $output_effects = $output_effects? $output_effects."&".$self->transform_hash_to_string(\%uORF_effect):$self->transform_hash_to_string(\%uORF_effect);
      }

    }
  }

  $result{'uSTOP_gained_flag'} = $output_flag;
  $result{'uSTOP_gained_effect'} = $output_effects;
  return \%result;

}

sub uSTOP_lost {

  # Description: annotate if a five_prime_UTR_varint removes a stop codon of an existing uORF (given that uORF does not not change)

  my ($self, $variant_info, $UTR_info, $mut_pos, $mut_utr_seq, $existing_uORF, $mut_uORF) = @_;

  my $chr = $variant_info->{chr};
  my $pos = $variant_info->{pos};
  my $ref = $variant_info->{ref};
  my $alt = $variant_info->{alt};

  my @sequence = split //, $UTR_info->{seq};
  my $strand = $UTR_info->{strand};

  #return annotators
  my $uSTOP_lost_AltStop = "";  #whether there is an alternative stop codon downstream
  my $uSTOP_lost_AltStopDistanceToCDS = ""; # the distance between alternative stop codon and the CDS
  my $uSTOP_lost_FrameWithCDS = ""; # the frame of the uORF with respect to CDS
  my $uSTOP_lost_KozakContext = ""; # the Kozak context sequence of the uORF with the lost stop codon
  my $uSTOP_lost_KozakStrength = ""; # the Kozak strength of the uORF with the lost stop codon
  my $uSTOP_lost_evidence = ""; # whether this uORF has any evidence of being translated
  ###

  my $current_kozak = "";
  my $current_kozak_strength = "";

  #indicate whether the variant ever disrupts a stop codon
  my $output_flag = "";
  #indicate whether the variant ever disrupts the uORF that being analyzed
  my $flag_uORF=0;

  #the relative position of input variant in the UTR sequence
  my %result = (); #result is a hash table with two elements: $flag and $output_effects
  my $output_effects="";

  my @stop_codons = ("TAA","TGA","TAG");

  my $ref_coding = $self->{ref_coding};
  my $alt_coding = $self->{alt_coding};

  my @mut_utr_seq = split //,$mut_utr_seq;
  my $length = @mut_utr_seq;

  my @start = sort {$a <=> $b} (keys %{$existing_uORF});

  if(%{$existing_uORF}){
    for (my $i=0;$i<@start;$i++){
      $flag_uORF=0;
      my $start_pos = $start[$i];
      my @stops = sort {$a <=> $b} @{$existing_uORF->{$start_pos}};
      my $stop_pos=$stops[0];

      next if ($mut_pos-$stop_pos>2);

      #for snps and deletion
      next if (defined($ref_coding) & $mut_pos+length($ref_coding)-1<$stop_pos);

      next if ($mut_pos<$stop_pos & !defined($ref_coding));   #for insertion

      #for deletion, it definitely disrupting the stop codon.
      if (length($alt_coding) eq 0) {
        $flag_uORF=1;
      } else {
        my $mut_codon = $mut_utr_seq[$stop_pos].$mut_utr_seq[$stop_pos+1].$mut_utr_seq[$stop_pos+2];

        next if (grep( /^$mut_codon$/, @stop_codons));
        $flag_uORF=1;
      }

      if($flag_uORF){
        #getting the Kozak context and Kozak strength of the start codon

        if ((($start_pos-3)>=0)&&($sequence[($start_pos+3)])){
          $current_kozak = $sequence[($start_pos-3)].$sequence[($start_pos-2)].$sequence[$start_pos-1]."ATG".$sequence[$start_pos+3];
        }
        else{
          $current_kozak = '-';
        }

        if ($current_kozak !~ /-/){
          my @split_kozak = split //, $current_kozak;
          $current_kozak_strength = 1;
          if ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))&&($split_kozak[6] eq 'G')){
            $current_kozak_strength = 3;
          }
          elsif ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))||($split_kozak[6] eq 'G')){
            $current_kozak_strength = 2;
          }
        }

        $uSTOP_lost_KozakContext=$current_kozak;
        $uSTOP_lost_KozakStrength=$self->{kozak_strength}{$current_kozak_strength}? $self->{kozak_strength}{$current_kozak_strength}:$current_kozak_strength;

        # the sequence before mut_pos should not be changed. Thus start_pos shall still correspond to a start codon;
        # if the sequence is indeed very short as such ATGTGA

        my @mut_stops;
        if(exists($mut_uORF->{$start_pos})){
          @mut_stops = sort {$a <=> $b} @{$mut_uORF->{$start_pos}}
        }

        if (@mut_stops>0){
          $uSTOP_lost_AltStop = "True";
          $uSTOP_lost_AltStopDistanceToCDS = $length-$mut_stops[0];
        } #if there is no alternative stop codon
        else{
          $uSTOP_lost_AltStop = "False";
          $uSTOP_lost_AltStopDistanceToCDS = "NA";
        }
        if (($length-$start_pos) % 3){
          $uSTOP_lost_FrameWithCDS = "outOfFrame";
        }
        else {
          $uSTOP_lost_FrameWithCDS = "inFrame";
        }
        #find evidence from sorf

        $uSTOP_lost_evidence= (exists $self->{uORF_evidence})? $self->find_uorf_evidence($UTR_info,$chr,$start_pos): "NA";

        my %uORF_effect = (
          "uSTOP_lost_AltStop" => $uSTOP_lost_AltStop,
          "uSTOP_lost_AltStopDistanceToCDS" => $uSTOP_lost_AltStopDistanceToCDS,
          "uSTOP_lost_FrameWithCDS" => $uSTOP_lost_FrameWithCDS,
          "uSTOP_lost_KozakContext" => $uSTOP_lost_KozakContext,
          "uSTOP_lost_KozakStrength" => $uSTOP_lost_KozakStrength,
          "uSTOP_lost_Evidence" => $uSTOP_lost_evidence,
        );

        $output_flag = $output_flag? $output_flag."&"."5_prime_UTR_stop_codon_loss_variant":"5_prime_UTR_stop_codon_loss_variant";
        $output_effects = $output_effects? $output_effects."&".$self->transform_hash_to_string(\%uORF_effect):$self->transform_hash_to_string(\%uORF_effect);
      }
    }
  }

  $result{'uSTOP_lost_flag'} = $output_flag;
  $result{'uSTOP_lost_effect'} = $output_effects;

  return \%result;
}

sub uAUG_lost {
  # Description: annotate if a five_prime_UTR_varint removes a start codon of an existing uORF

  my ($self, $variant_info, $UTR_info, $mut_pos, $mut_utr_seq, $existing_ref_uORF, $start) = @_;

  my $chr = $variant_info->{chr};
  my $pos = $variant_info->{pos};
  my $ref = $variant_info->{ref};
  my $alt = $variant_info->{alt};

  my @sorted_starts = @{$UTR_info->{start}};
  my @sequence = split //, $UTR_info->{seq};
  my $utr_length = @sequence;
  my $strand = $UTR_info->{strand};

  #return annotators
  my $uAUG_lost_type = "";   # the uORF type with the reference allele - uORF, inframe_oORF, outOfFrame_oORF
  my $uAUG_lost_DistanceToCDS = ""; # the distance of the lost uAUG to CDS
  my $uAUG_lost_DistanceToSTOP = ""; # the distance of the lost uAUG to stop codon (could be in CDS)
  my $uAUG_lost_KozakContext = ""; # the Kozak context sequence of the lost uAUG
  my $uAUG_lost_KozakStrength = ""; # the strength of KozakContext of the lost uAUG
  my $uAUG_lost_evidence = ""; # whether this uAUG has any evidence of being translated

  my $current_kozak = "";
  my $current_kozak_strength = "";

  #indicate whether the variant ever disrupts a stop codon
  my $output_flag = "";
  #indicate whether the variant ever disrupts the uORF that being analyzed
  my $flag_uORF=0;

  #the relative position of input variant in the UTR sequence

  my %result = (); #result is a hash table with two elements: $flag and $output_effects
  my $output_effects="";

  my $ref_coding = $self->{ref_coding};
  my $alt_coding = $self->{alt_coding};

  my @mut_utr_seq = split //,$mut_utr_seq;

  my @start = @{$start};

  for (my $i=0;$i<@start;$i++){
    $flag_uORF=0;
    my $start_pos = $start[$i];

    next if ($mut_pos-$start_pos>2);

    if (length($ref_coding) ne 0){
      next if ($mut_pos+length($ref_coding)-1<$start_pos);
    }
    next if ($mut_pos<$start_pos);   #for insertion

    #for deletion, it definitely disrupting the uORF.

    if (length($alt_coding) eq 0) {
      $flag_uORF=1;
    } else {
      my $mut_codon = $mut_utr_seq[$start_pos].$mut_utr_seq[$start_pos+1].$mut_utr_seq[$start_pos+2];
      next if ($mut_codon eq "ATG");
      $flag_uORF=1;
    }

    if($flag_uORF){

      #getting the Kozak context and Kozak strength of the lost start codon
      if ((($start_pos-3)>=0)&&($sequence[($start_pos+3)])){
        $current_kozak = $sequence[($start_pos-3)].$sequence[($start_pos-2)].$sequence[$start_pos-1]."ATG".$sequence[$start_pos+3];
      } else {
        $current_kozak = '-';
      }

      if ($current_kozak !~ /-/){
        my @split_kozak = split //, $current_kozak;
        $current_kozak_strength = 1;
        if ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))&&($split_kozak[6] eq 'G')){
          $current_kozak_strength = 3;
        } elsif ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))||($split_kozak[6] eq 'G')){
          $current_kozak_strength = 2;
        }
      }
      $uAUG_lost_KozakContext=$current_kozak;
      $uAUG_lost_KozakStrength=$self->{kozak_strength}{$current_kozak_strength}? $self->{kozak_strength}{$current_kozak_strength}:$current_kozak_strength;

      #check what kind of uORF does that correspond to?
      #first check whether it's overlapping with CDS

      if (exists($existing_ref_uORF->{$start_pos})){ #if there is stop codon within 5'UTR
        $uAUG_lost_type = "uORF"
      } elsif(($utr_length-$start_pos) % 3){
        $uAUG_lost_type = "OutOfFrame_oORF";
      } else {
        $uAUG_lost_type = "InFrame_oORF";
      }

      $uAUG_lost_DistanceToCDS = $utr_length - $start_pos;

      my @overlapping_seq = split //, $UTR_info->{seq}.$UTR_info->{cds_seq};
      my %existing_overlapping_uORF = %{$self->existing_uORF(\@overlapping_seq)};

      if(exists($existing_overlapping_uORF{$start_pos})){
        my @stop_pos_array = sort{$a<=>$b}@{$existing_overlapping_uORF{$start_pos}};
        my $stop_pos = $stop_pos_array[0];
        $uAUG_lost_DistanceToSTOP = $stop_pos-$start_pos;
      } else {
        $uAUG_lost_DistanceToSTOP = "NA"
      }

      $uAUG_lost_evidence= (exists $self->{uORF_evidence})? $self->find_uorf_evidence($UTR_info,$chr,$start_pos): "NA";

      my %uORF_effect = (
          "uAUG_lost_type" => $uAUG_lost_type,
          "uAUG_lost_CapDistanceToStart" =>$start_pos,
          "uAUG_lost_DistanceToCDS" => $uAUG_lost_DistanceToCDS,
          "uAUG_lost_DistanceToStop" => $uAUG_lost_DistanceToSTOP,
          "uAUG_lost_KozakContext" => $uAUG_lost_KozakContext,
          "uAUG_lost_KozakStrength" => $uAUG_lost_KozakStrength,
          "uAUG_lost_Evidence" => $uAUG_lost_evidence,
        );

      $output_flag = $output_flag? $output_flag."&"."5_prime_UTR_premature_start_codon_loss_variant": "5_prime_UTR_premature_start_codon_loss_variant";
      $output_effects = $output_effects? $output_effects."&".$self->transform_hash_to_string(\%uORF_effect):$self->transform_hash_to_string(\%uORF_effect);
    }
  }

  $result{'uAUG_lost_flag'} = $output_flag;
  $result{'uAUG_lost_effect'} = $output_effects;

  return \%result;
}

sub uFrameshift {

  # Description: annotate if a five_prime_UTR_varint create a frameshift in existing uORFs

  my ($self, $variant_info, $UTR_info, $mut_pos, $end_pos, $mut_utr_seq, $existing_uORF, $mut_uORF, $start) = @_;

  my $chr = $variant_info->{chr};
  my $pos = $variant_info->{pos};
  my $ref = $variant_info->{ref};
  my $alt = $variant_info->{alt};

  my @sequence = split //, $UTR_info->{seq};
  my $strand = $UTR_info->{strand};
  my $utr_length = @sequence;

  #return annotators
  my $uFrameshift_ref_type = ""; # the type of uORF with the reference allele
  my $uFrameshift_ref_type_length = ""; # the length of the uORF with of the reference allele.
  my $uFrameshift_StartDistanceToCDS = ""; # the distance between the start codon of the disrupted uORF and CDS
  my $uFrameshift_alt_type = ""; # the type of uORF with the alternative allele
  my $uFrameshift_alt_type_length = ""; # the length of the uORF with of the alternative allele
  my $uFrameshift_KozakContext = ""; # the Kozak context sequence of the disrupted uORF
  my $uFrameshift_KozakStrength = ""; # the Kozak strength of the disrupted uORF
  my $uFrameshift_evidence = ""; # whehter there is translation evidence for the disrupted uORF

  #indicate whether the variant ever introduce a frameshift variant
  my $flag_uORF;
  my $output_flag = "";

  my $current_kozak="";
  my $current_kozak_strength="";

  my %result = (); #result is a hash table with two elements: $flag and $output_effects
  my $output_effects="";

  my $ref_coding = $self->{ref_coding};
  my $alt_coding = $self->{alt_coding};

  #skip alleles with same length
  return {} unless(length($ref_coding) ne length($alt_coding));

  #if it's a deletion at the boundary of exon and intron, we would skip the annotation

  my @mut_utr_seq = split //,$mut_utr_seq;
  my $length = @mut_utr_seq;

  my @start = @{$start};

  #check for each uORF
  if(@start){
    for (my $i=0;$i<@start;$i++){
      $flag_uORF=0;
      my $start_pos = $start[$i];

      my $check_point = $start_pos;
      # the checking end point of eligible area
      #For uORF: start_pos .. check_point
      #For overlapping ORF: start_pos .. 3' end of 5'UTR sequence

      #if the variant is entirely in the uORF (within 5'UTR)

      if(exists($existing_uORF->{$start_pos})) {
        my @stops = sort {$a <=> $b} @{$existing_uORF->{$start_pos}};
        $check_point = $stops[0]-1;
      } else {
        #if the existing uORF is an oORF
        $check_point = $utr_length-1;
      }

      # only check the ones between start and stop codons and
      # ignore the cases at the boundary of start and stop codon since the effect on start/stop would be evaluated anyway
      if(($mut_pos>=$start_pos+3)&($end_pos<=$check_point)){
        #snp and insertion could only be annotated here
        $flag_uORF = abs(length($ref_coding)-length($alt_coding))%3;
      }

      if($flag_uORF){
        #getting the Kozak context and Kozak strength of the start codon
        if ((($start_pos-3)>=0)&&($sequence[($start_pos+3)])){
          $current_kozak = $sequence[($start_pos-3)].$sequence[($start_pos-2)].$sequence[$start_pos-1]."ATG".$sequence[$start_pos+3];
        }
        else{
          $current_kozak = '-';
        }

        if ($current_kozak !~ /-/){
          my @split_kozak = split //, $current_kozak;
          $current_kozak_strength = 1;
          if ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))&&($split_kozak[6] eq 'G')){
            $current_kozak_strength = 3;
          }
          elsif ((($split_kozak[0] eq 'A')||($split_kozak[0] eq 'G'))||($split_kozak[6] eq 'G')){
            $current_kozak_strength = 2;
          }
        }

        $uFrameshift_KozakContext=$current_kozak;
        $uFrameshift_KozakStrength=$self->{kozak_strength}{$current_kozak_strength}? $self->{kozak_strength}{$current_kozak_strength}:$current_kozak_strength;

        #the annotation of the original uORF
        my @ref_overlapping_seq = split //, $UTR_info->{seq}.$UTR_info->{cds_seq};
        my %ref_existing_oORF = %{$self->existing_uORF(\@ref_overlapping_seq)};

        if (exists($existing_uORF->{$start_pos})){ #if there is stop codon within 5'UTR
          $uFrameshift_ref_type = "uORF";
        } elsif (($utr_length-$start_pos) % 3) {
          $uFrameshift_ref_type = "OutOfFrame_oORF";
        } else{
          $uFrameshift_ref_type = "InFrame_oORF";
        }

        if (exists($ref_existing_oORF{$start_pos})){
          my @stops = sort {$a <=> $b} @{$ref_existing_oORF{$start_pos}};
          $uFrameshift_ref_type_length = $stops[0]-$start_pos+3;
        } else {
          $uFrameshift_ref_type_length = "NA";
        }

        $uFrameshift_StartDistanceToCDS = $utr_length - $start_pos;

        my @alt_overlapping_seq = split //, $mut_utr_seq.$UTR_info->{cds_seq};
        my %alt_existing_oORF = %{$self->existing_uORF(\@alt_overlapping_seq)};

        if (exists($mut_uORF->{$start_pos})){
          $uFrameshift_alt_type = "uORF";
        } elsif(($length-$start_pos)%3){
          $uFrameshift_alt_type = "OutOfFrame_oORF";
        } else {
          $uFrameshift_alt_type = "InFrame_oORF";
        }

        #get the length
        if (exists($alt_existing_oORF{$start_pos})){
          my @stops = sort {$a <=> $b} @{$alt_existing_oORF{$start_pos}};
          $uFrameshift_alt_type_length = $stops[0]-$start_pos+3;
        } else {
          $uFrameshift_alt_type_length = "NA";
        }

        #find evidence in added reference file
        $uFrameshift_evidence= (exists $self->{uORF_evidence})? $self->find_uorf_evidence($UTR_info,$chr,$start_pos): "NA";

        my %uORF_effect = (
            "uFrameShift_ref_type" => $uFrameshift_ref_type,
            "uFrameShift_ref_type_length" => $uFrameshift_ref_type_length,
            "uFrameShift_ref_StartDistanceToCDS" => $uFrameshift_StartDistanceToCDS,
            "uFrameShift_alt_type" => $uFrameshift_alt_type,
            "uFrameShift_alt_type_length" => $uFrameshift_alt_type_length,
            "uFrameShift_KozakContext" => $uFrameshift_KozakContext,
            "uFrameShift_KozakStrength" => $uFrameshift_KozakStrength,
            "uFrameShift_Evidence" => $uFrameshift_evidence,
        );

        $output_flag = $output_flag? $output_flag . "&" . "5_prime_UTR_uORF_frameshift_variant": "5_prime_UTR_uORF_frameshift_variant";
        $output_effects = $output_effects? $output_effects."&".$self->transform_hash_to_string(\%uORF_effect):$self->transform_hash_to_string(\%uORF_effect);
      }
    }
  }

  $result{'uFrameShift_flag'} = $output_flag;
  $result{'uFrameShift_effect'} = $output_effects;

  return \%result;
}

#########
# Utils #
#########

=head2 count_number_ATG

  Arg [1]    : $seq
  Description: Count the number of existing ATGs in the five prime UTR sequence.
  Return: returns the number of uORFs and the number of oORFs
  Returntype : hashref

=cut

sub count_number_ATG {
  my ($self,$seq) = @_;

  my @sequence = split //, $seq;
  my $length = @sequence;

  my @atg_pos = @{$self->get_ATG_pos(\@sequence)};
  my @mes_pos = @{$self->get_stopcodon_pos(\@sequence)};

  my $inframe_stop_num=0;
  my $outofframe_atg_num=0;
  my $inframeORF_num=0;

  foreach my $atg (@atg_pos){

    my $flag=0;

    #indicate whether there is a stop codon with respect to this ATG
    foreach my $mes (@mes_pos){
      unless (($mes-$atg) % 3 || ($mes-$atg) < 0 ){
        $flag=1;
        last;
      };
    }
    #if there is no stop codon, then look at whether it's Out_of_frame or Inframe
    if($flag==1){
      $inframe_stop_num++;
    } else {
      (($length-$atg) % 3)? $outofframe_atg_num++ : $inframeORF_num++;
    }

  }

  my %existing_uORF_num = (
    "UTRAnnotator_existing_uORFs" => $inframe_stop_num,
    "UTRAnnotator_existing_OutOfFrame_oORFs" => $outofframe_atg_num,
    "UTRAnnotator_existing_InFrame_oORFs" => $inframeORF_num,
  );

  return \%existing_uORF_num;

}


=head2 existing_uORF

  Arg [1]    : $seq
  Description: obtaining the relative coordinates of start and end pos of existing uORF in the five prime UTR sequence.
  Return: returns as the key the position of the first nucleotide of start codon and the value is all the positions of the first nucleotide of the stop codon.
  Returntype : hashref

=cut


sub existing_uORF {
  my ($self,$seq) = @_;

  my @atg_pos = @{$self->get_ATG_pos($seq)};
  my @mes_pos = @{$self->get_stopcodon_pos($seq)};
  my %uORF;

  foreach my $atg (@atg_pos){
    my @stop_pos;
    foreach my $mes (@mes_pos){
      push @stop_pos, $mes unless ((($mes-$atg) % 3) || ($mes-$atg)<0);
    }

    $uORF{$atg} = [@stop_pos] if (@stop_pos);
  }

  return \%uORF;
}


=head2 get_ATG_pos

  Arg [1]    : $seq
  Description:  get all the relative position of A in ATG of the five prime UTR sequence
  Return: returns ATG position in sequence.
  Returntype : listref

=cut


sub get_ATG_pos {
  my ($self,$seq) = @_;

  my @sequence = @{$seq};
  my $length = @sequence;
  my $seq_str=join '', @sequence;
  my @atg_pos = grep {(substr ($seq_str, $_,3) eq 'ATG')} 0..($length);

  return \@atg_pos;
}


=head2 get_stopcodon_pos

  Arg [1]    : $seq
  Description:  get all the relative position of stop codons in the five prime UTR sequence
  Return: returns stop-codon position in sequence.
  Returntype : listref

=cut


sub get_stopcodon_pos {
  my ($self,$seq) = @_;

  my @sequence = @{$seq};
  my $length = @sequence;

  my @met_pos;
  if($length){
    for (my $seq_n=0; $seq_n<$length; $seq_n++){
      if ((($sequence[$seq_n] eq 'T')&&($sequence[$seq_n+1] eq 'A')&&($sequence[$seq_n+2] eq 'A'))
          ||(($sequence[$seq_n] eq 'T')&&($sequence[$seq_n+1] eq 'A')&&($sequence[$seq_n+2] eq 'G'))
          ||(($sequence[$seq_n] eq 'T')&&($sequence[$seq_n+1] eq 'G')&&($sequence[$seq_n+2] eq 'A'))){
           push @met_pos,$seq_n;
      }
    }
  }

  return \@met_pos;
}

sub get_ref_coding {
  my ($self,$ref) = @_;

  my $ref_coding = $ref;

  if($ref_coding eq "-"){   #for insertion
    $ref_coding = "";
  }

  return $ref_coding;
}

sub get_alt_coding {

  my ($self,$alt,$strand) = @_;
  my $alt_coding = $alt;

  $alt_coding = "" if($alt_coding eq "-");

  reverse_comp(\$alt_coding) if($strand < 0);

  return $alt_coding;
}


=head2 mut_utr_sequence

  Description:  get the mutated five prime UTR sequence
  Return: Mutated UTR sequence
  Returntype : String

=cut


sub mut_utr_sequence {
  my ($self,$seq,$mut_pos,$ref_coding,$alt_coding,$strand) = @_;

  my @sequence = @{$seq};
  my $length = @sequence;

  #order from 5' to 3'
  my $mut_sequence;
  $mut_sequence = (join "", @sequence[0..$mut_pos-1]).$alt_coding.(join "", @sequence[$mut_pos+length($ref_coding)..$length-1]);

  return $mut_sequence;
}

sub transform_hash_to_string {
  my ($self,$hash) = @_;

  my %hash = %{$hash};

  my $output_str = "";
  foreach my $key (sort keys %hash){
    $output_str = $output_str.$key.":".$hash{$key}.",";
  };

  chop($output_str) if($output_str);

  return $output_str;
}

sub utr_exon_position {
  my ($self,$UTR_info) = @_;

  my @sorted_starts = @{$UTR_info->{start}};
  my @sorted_ends = @{$UTR_info->{end}};
  my $num_exons = @sorted_starts;
  my $strand = $UTR_info->{strand};
  my %chr_position;
  my $utr_position = 0;

  if ($strand == 1){
    #create a map from chromosome position to UTR position
    for (my $m=0; $m<$num_exons; $m++){
      for (my $p=$sorted_starts[$m]; $p<=$sorted_ends[$m]; $p++){
        $chr_position{$p}=$utr_position;
        $utr_position++;
      }
    }
  }

  if ($strand == -1){
    #create a map from chromosome position to UTR position
    #the exons were arranged in increasing order above
    for (my $m=$num_exons-1; $m>=0; $m--){
      for (my $p=$sorted_ends[$m]; $p>=$sorted_starts[$m]; $p--){
        $chr_position{$p}=$utr_position;
        $utr_position++;
      }
    }
  }

  return \%chr_position;
}

sub chr_position {
  my ($self,$UTR_info) = @_;

  my @sorted_starts = @{$UTR_info->{start}};
  my @sorted_ends = @{$UTR_info->{end}};
  my $num_exons = @sorted_starts;
  my $strand = $UTR_info->{strand};
  my %utr_position;
  my $utr_position = 0;

  if ($strand == 1){
  #create a map from chromosome position to UTR position
    for (my $m=0; $m<$num_exons; $m++){
      for (my $p=$sorted_starts[$m]; $p<=$sorted_ends[$m]; $p++){
        $utr_position{$utr_position}=$p;
        $utr_position++;
      }
    } 
  }

  if ($strand == -1){
    #create a map from chromosome position to UTR position
    #the exons were arranged in increasing order above
    for (my $m=$num_exons-1; $m>=0; $m--){
      for (my $p=$sorted_ends[$m]; $p>=$sorted_starts[$m]; $p--){
        $utr_position{$utr_position}=$p;
        $utr_position++;
      }
    }
  }

  return \%utr_position;
}

sub get_allele_exon_pos {
  # return the 5' start and end position at the exon of the ref allele
  my ($self, $strand, $pos, $ref_coding, $UTR_info) = @_;
  my %utr_exon_pos = %{$self->utr_exon_position($UTR_info)};

  my $ref_start;
  my $ref_end;

  if ($strand == 1) {
    $ref_start = $utr_exon_pos{$pos};

    if(length($ref_coding)){
      $ref_end = $utr_exon_pos{$pos + length($ref_coding) - 1};
    } else {
      $ref_end = $ref_start;
    }
  }
  else {
    $ref_start = $utr_exon_pos{$pos+length($ref_coding)-1};

    if(length($ref_coding)){
      $ref_end = $utr_exon_pos{$pos};
    } else {
      $ref_end = $ref_start;
    }
  }

  return ($ref_start, $ref_end);
}

sub find_uorf_evidence {
  my ($self,$UTR_info,$chr,$start_pos)=@_;

  my %utr_pos = %{$self->chr_position($UTR_info)};
  my $start_chr_pos = $utr_pos{$start_pos};

  my $query = ($chr=~/chr/i)? $chr . ":" . $start_chr_pos : "chr" . $chr . ":" . $start_chr_pos;
  my $evidence = $self->{uORF_evidence}->{$query}? "True" : "False";

  return $evidence;
}

1;

