#!/usr/bin/perl -w # Author: Justin Love (justin.love@cesgames.com) # # Developed by significant modification of a script for AVR processors, # which carried the following notice: # Copyright (c) 2003 University of Utah and the Flux Group. # All rights reserved. # # Permission to use, copy, modify, distribute, and sell this software # and its documentation is hereby granted without fee, provided that the # above copyright notice and this permission/disclaimer notice is # retained in all copies or modified versions, and that both notices # appear in supporting documentation. THE COPYRIGHT HOLDERS PROVIDE # THIS SOFTWARE "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, # INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE COPYRIGHT # HOLDERS DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER # RESULTING FROM THE USE OF THIS SOFTWARE. # # Users are requested, but not required, to send to csl-dist@cs.utah.edu # any improvements that they make and grant redistribution rights to the # University of Utah. # # Author: John Regehr (regehr@cs.utah.edu) use strict; # my $ORIGIN = 0; my $ORIGIN = hex("80"); my $DATA = "_?R_INIT"; my $START = "_\@cstart"; my $MAX_ADDRESS = 0x7FFF; my $STACK_INC = "_\@cprep"; my $STACK_DEC = "_\@cdisp"; my $VEC_SIZE = 2; my $VEC_END = hex(0x24); my $VEC_COUNT = $VEC_END / $VEC_SIZE; my $CALL_SIZE = 2; my $INT_SIZE = 3; my $PUSH_SIZE = 2; my $INDIRECT = $MAX_ADDRESS * 4; my $RECURSIVE = 1; my $FLAT = 0; my %addrs; my %insns; my %args; my %labels; my %below_label; my %symbols; my %vectors; my $linenum = 0; my %lines; my %line_to_addr; my @bytes; my %depths; my %flat_function_depths; my %recursive_function_depths; my %call_graph; my %called_by_graph; my %references; sub next_addr ($) { my $addr = shift; my $line = $addrs{$addr}; return $line_to_addr{$line + 1}; } sub previous_addr ($) { my $addr = shift; my $line = $addrs{$addr}; die if ($line == 0); return $line_to_addr{$line - 1}; } sub insn_size ($) { my $addr = shift; return next_addr($addr) - $addr; } sub is_branch ($) { my $addr = shift; return ( $insns{$addr} eq "BC" || $insns{$addr} eq "BNC" || $insns{$addr} eq "BZ" || $insns{$addr} eq "BNZ" || $insns{$addr} eq "BT" || $insns{$addr} eq "BF"); } sub is_fallthrough ($) { my $addr = shift; return ( $insns{$addr} eq "MOV" || $insns{$addr} eq "XCH" || $insns{$addr} eq "MOVW" || $insns{$addr} eq "XCHW" || $insns{$addr} eq "ADD" || $insns{$addr} eq "ADDC" || $insns{$addr} eq "SUB" || $insns{$addr} eq "SUBC" || $insns{$addr} eq "AND" || $insns{$addr} eq "OR" || $insns{$addr} eq "XOR" || $insns{$addr} eq "CMP" || $insns{$addr} eq "ADDW" || $insns{$addr} eq "SUBW" || $insns{$addr} eq "CMPW" || $insns{$addr} eq "INC" || $insns{$addr} eq "INCW" || $insns{$addr} eq "DECW" || $insns{$addr} eq "ROR" || $insns{$addr} eq "ROL" || $insns{$addr} eq "RORC" || $insns{$addr} eq "ROLC" || $insns{$addr} eq "SET1" || $insns{$addr} eq "CLR1" || $insns{$addr} eq "NOT1" || $insns{$addr} eq "PUSH" || $insns{$addr} eq "POP" || $insns{$addr} eq "DBNZ" || $insns{$addr} eq "NOP" || $insns{$addr} eq "EI" || $insns{$addr} eq "DI" || $insns{$addr} eq "HALT" || $insns{$addr} eq "STOP"); } sub is_jmp ($) { my $addr = shift; return ($insns{$addr} eq "BR"); } sub is_call ($) { my $addr = shift; if (!defined($addr)) { die; } return ($insns{$addr} eq "CALL" || $insns{$addr} eq "CALLT") } sub is_return ($) { my $addr = shift; return ( $insns{$addr} eq "RET" || $insns{$addr} eq "RETI" || $insns{$addr} eq "???"); } sub word_at ($) { my $addr = shift; if ($addr >= $MAX_ADDRESS) { print "out of bounds address\n"; die; } #printf "%x: %x,%x\n", $addr, $bytes[$addr], $bytes[$addr + 1]; return $bytes[$addr] + ($bytes[$addr + 1] * 256); } sub lookup_symbol ($) { my $symbol = shift; if (!defined($symbols{$symbol})) { print "failed to resolve $symbol\n"; exit; } return $symbols{$symbol}; } sub resolve_address ($) { my $symbol = shift; if ($symbol =~ /(.*)\+0x([0-9a-f]+)/) { return lookup_symbol($1) + hex($2); } else { return lookup_symbol($1); } } sub get_target ($) { my $addr = shift; if (is_jmp ($addr) || is_call ($addr) || is_branch ($addr)) { my $code = $args{$addr}; # print "$code\n"; if ($code =~ /!(.*)/) { # print "absolute address\n"; return resolve_address($1); } elsif ($code =~ /\$(.*)/) { # print "relative address\n"; return resolve_address($1); } elsif ($code =~ /\[(.*)\]/) { # print "indirect address\n"; return word_at(resolve_address($1)); } elsif ($code eq "AX") { # printf "%x: indirect call, ignoring\n", $addr; return $INDIRECT; } else { print "unable to recognize address $code\n"; exit; } } die; } sub get_target_name ($) { my $addr = shift; if (is_jmp ($addr) || is_call ($addr) || is_branch ($addr)) { my $code = $args{$addr}; # print "$code\n"; if ($code =~ /[!\$](.*)\+0x([0-9a-f]+)/) { return $1; } elsif ($code =~ /[!\$](.*)/) { return $1; } elsif ($code =~ /\[(.*)\]/) { return $1; } elsif ($code eq "AX") { return "INDIRECT"; } else { print "unable to recognize address $code\n"; exit; } } die; } sub get_literal ($) { my $addr = shift; my $code = $args{$addr}; if ($code =~ /#([0-9A-H])*H/) { return hex($1); } else { return "NONE"; } } sub disassemble ($) { my $fn = shift; my $current_label = "TOP"; open INF, $fn or die "can't open input file $fn"; while (my $line = ) { chomp $line; $linenum++; $lines{$linenum} = $line; # skip first few lines, they're junk next if ($linenum <= 1); # skip blank lines next if ($line eq ""); # kill comments ($line =~ s/\s*;.*$//); # print "$line\n"; if ($line =~ /[^0-9A-F]*([0-9A-F]+) ([^\s]*)\s+([0-9A-F]+)\s+(.*)$/) { my $addr = hex($1); my $label = $2; my $data = $3; my $code = $4; if ($label) { if (defined($labels{$addr})) { $current_label = $labels{$addr}; } else { $current_label = $label; } # $labels{$addr} = $label; # $symbols{$label} = $addr; } else { } $below_label{$addr} = $current_label; my $address = $addr; while ($data =~ /([0-9A-F]{2})/g) { $bytes[$address] = hex($1); $address = $address + 1; } $line_to_addr{$linenum} = $addr; if ($code eq "???") { $insns{$addr} = $code; $args{$addr} = ""; $addrs{$addr} = $linenum; next; } die if (!($code =~ /^([\.a-zA-Z]+)\s*(.*)?$/)); my $insn = $1; my $arg = $2; $insns{$addr} = $insn; $args{$addr} = $arg; $addrs{$addr} = $linenum; next; } # paranoid: don't ignore lines that look funny print "oops -- can't understand $linenum: '$line'\n"; exit; } #print "parsed:\n"; #print " ".scalar(keys %insns)." instructions\n"; close INF; } sub parse_symbols { my $fn = shift; open INF, $fn or die "can't open input file $fn"; while (my $line = ) { chomp $line; if ($line =~ /01([0-9A-F]{4})(.*)/) { my $addr = hex($1); my $symbol = $2; if ($symbol ne "_?R_INIS") { $labels{$addr} = $symbol; $symbols{$symbol} = $addr; } } } #print "parsed:\n"; #print " ".scalar(keys %labels)." labels\n"; close INF; } sub max ($$) { (my $a, my $b) = @_; if (!defined ($a)) { return $b; } if (!defined ($b)) { return $a; } if ($a > $b) { return $a; } else { return $b; } } sub stack_effect ($) { my $addr = shift; return $PUSH_SIZE if ($insns{$addr} eq "PUSH"); return -$PUSH_SIZE if ($insns{$addr} eq "POP"); if (is_call($addr)) { my $previous = previous_addr($addr); my $literal = get_literal($previous); if ($literal ne "NONE") { my $target = get_target_name($addr); if ($target eq $STACK_INC) { return $literal; } elsif ($target eq $STACK_DEC) { return -$literal; } } } return 0; } sub compute_stack { # $addr is the address of the current instruction # $line_depths is a hash from addresses to max stack depth # $old_depth is the stack depth before executing this instruction (my $addr, my $line_depths, my $old_depth, my $traversal) = @_; #printf "addr = %x, old_depth = %d\n", $addr, $old_depth; die if (!defined $addr); die if (!defined $line_depths); die if (!defined $old_depth); die if (!defined $traversal); if (!defined($insns{$addr})) { printf "hmmm: we don't have an instruction at address %x\n", $addr; exit; } # compute new depth my $new_depth = $old_depth + stack_effect ($addr); # termination condition 1 if (defined($line_depths->{$addr}) && $line_depths->{$addr} >= $new_depth) { # print "smaller depth\n"; return; } # if ($new_depth != $old_depth) { # printf "addr = %x, %d -> %d, insn = %s\n", $addr, $old_depth, $new_depth, $insns{$addr}; # } # record new depth $line_depths->{$addr} = $new_depth; # termination condition 2 -- jump to origin resets the program if (is_jmp ($addr) && get_target ($addr) == $ORIGIN) { print "jump to orign\n"; return; } # termination condition 3 -- ret and reti don't go anywhere in our simple model if (is_return($addr)) { # printf "addr = %x, old_depth = %d, insn = %s\n", $addr, $old_depth, $insns{$addr}; # print "return\n"; return; } # don't record call depths on the stack-adusting routines if ($new_depth == $old_depth) { if (is_call($addr) || is_branch($addr) || is_jmp($addr)) { my $context = $below_label{$addr}; my $target = get_target_name($addr); if ($target ne $context) { my $depth; $depth = $call_graph{$context}{$target}; if (defined($depth)) { $call_graph{$context}{$target} = max($old_depth, $depth); } else { $call_graph{$context}{$target} = $old_depth; } #print "$context:$target $call_graph{$context}{$target}\n"; } } } if ($traversal == $RECURSIVE && is_call ($addr)) { compute_stack (get_target ($addr), $line_depths, $new_depth + $CALL_SIZE, $traversal); } elsif (is_branch ($addr)) { compute_stack (get_target ($addr), $line_depths, $new_depth, $traversal); } elsif (is_jmp ($addr)) { my $target = get_target($addr); if ($target == $INDIRECT) { $new_depth -= $CALL_SIZE; # the return } elsif ($target == $addr) { # infinite loop; terminate #print "infinite loop\n"; return; } else { compute_stack (get_target ($addr), $line_depths, $new_depth, $traversal); return; } } compute_stack (next_addr($addr), $line_depths, $new_depth, $traversal); } sub set_vector ($) { my $vec = shift; my $word = word_at($vec); printf "vec: %x, word: %x\n", $vec, $word; $vectors{$vec} = $word; } sub vector_report { my %vec_stack; #add in any of your interrupt routines here set_vector(0x00); # reset set_vector(0x12); # WT set_vector(0x14); # WTI foreach my $vec (sort bynum keys %vectors) { my $init_stack; if ($vec eq "0") { $init_stack = 0; } else { $init_stack = $INT_SIZE; } $depths{$vec}{0} = 0; #create the table so we can get a reference to it compute_stack ($vectors{$vec}, $depths{$vec}, $init_stack, $RECURSIVE); my $depth = 0; foreach my $addr (keys %addrs) { if (defined ($depths{$vec}{$addr})) { $depth = max ($depth, $depths{$vec}{$addr}); } } $vec_stack{$vec} = $depth; printf "vector %x: %d(0x%x)\n", $vec, $depth, $depth; } } sub label_report ($) { my $traversal = shift; foreach my $label (keys %symbols) { my $init_stack; my %line_depths; my $address = $symbols{$label}; #print "$label: $address\n"; if ($address >= $symbols{$DATA}) { # print " data\n"; next; } elsif ($address < $ORIGIN) { # print " < origin\n"; if ($address > $VEC_END) { $address = word_at($address); # print " change to $address\n"; } else { # print " a vector\n"; next; } } if ($address eq $symbols{$START}) { $init_stack = 0; } else { $init_stack = $CALL_SIZE; } #print "symbol: ", $label, "\n"; compute_stack ($address, \%line_depths, $init_stack, $traversal); my $depth = 0; foreach my $addr (keys %line_depths) { if (defined ($line_depths{$addr})) { $depth = max ($depth, $line_depths{$addr}); } } if ($traversal == $RECURSIVE) { $recursive_function_depths{$label} = $depth; } else { $flat_function_depths{$label} = $depth; } # printf "********* label: %s: %d(0x%x)\n", $label, $depth, $depth; } } sub bynum { return $a <=> $b; } sub byreverse { return 0 - bynum; } sub dump_code { foreach my $linenum (sort bynum keys %lines) { my $addr = $line_to_addr{$linenum}; if (defined ($addr)) { my $depth; foreach my $vec (sort bynum keys %vectors) { $depth = max ($depth, $depths{$vec}{$addr}); } if (defined ($depth)) { print "$depth "; print "$lines{$linenum}\n"; } } } } sub dump_symbols { foreach my $symbol (keys %symbols) { print "$symbols{$symbol}: $symbol\n"; } } sub dump_function { # $function is the symbol to print # $indent is number of tabs (my $function, my $indent) = @_; while ($indent-- > 0) { print "\t"; } if (!defined($flat_function_depths{$function})) { print $function, "\n"; } else { printf "%s: %d/%d\n", $function, $flat_function_depths{$function}, $recursive_function_depths{$function}; } } sub graph_function { # $function is the symbol to print # $indent is number of tabs # $data is a reference to a hash (my $function, my $indent, my $data) = @_; if ($indent == 0) { print "\n"; } if (!defined($flat_function_depths{$function})) { print "$function "; } elsif (defined($data)) { printf "%s[%d] ", $function, $data->{$function}; } else { printf "%s(%d/%d) ", $function, $flat_function_depths{$function}, $recursive_function_depths{$function}; } } sub bare_function { # $function is the symbol to print # $indent is number of tabs # $data is a reference to a hash (my $function, my $indent, my $data) = @_; if ($indent == 0) { print "\n"; } print "$function"; if (defined($flat_function_depths{$function})) { printf "(%d/%d)", $flat_function_depths{$function}, $recursive_function_depths{$function}; } print " "; if (defined($data)) { printf "[%d] ", $data->{$function}; } } sub map_graph { # $graph is the 2D hash table to operate on # $operation is the function to apply to it (my $graph, my $operation) = @_; foreach my $symbol (keys %{$graph}) { &$operation($symbol, 0); foreach my $target (keys(%{$graph->{$symbol}})) { &$operation($target, 1, $graph->{$symbol}); } } foreach my $symbol (keys %flat_function_depths) { if (!defined ($graph->{$symbol})) { &$operation($symbol, 0); } } } sub recurse_graph { (my $graph, my $operation, my $symbol) = @_; if (keys(%{$graph->{$symbol}}) == 0) { return; } &$operation($symbol, 0); foreach my $target (keys(%{$graph->{$symbol}})) { &$operation($target, 1, $graph->{$symbol}); } foreach my $target (keys(%{$graph->{$symbol}})) { recurse_graph($graph, $operation, $target); } } sub invert_graph { (my $source, my $target) = @_; foreach my $a (keys %{$source}) { foreach my $b (keys(%{$source->{$a}})) { $target->{$b}{$a} = $source->{$a}{$b}; } } } sub find_references ($) { my $from = shift; my $addr = $symbols{$from}; if (!$addr) { print "did not find $from\n"; exit; } do { my $word = word_at($addr); my $label = $labels{$word}; if (defined($label)) { if (defined($flat_function_depths{$label})) { $references{$from}{$labels{$word}} = $addr; } } } until (++$addr >= $MAX_ADDRESS || defined($labels{$addr})); } sub map_references ($) { my $operation = shift; foreach my $source (keys %references) { &$operation($source, 0); foreach my $target (keys(%{$references{$source}})) { &$operation($target, 1); } } &$operation("INDIRECT", 0); foreach my $source (keys %references) { &$operation($source, 1); } } sub do_references { # Here there should be a list of calls to find_references() # with tables that contain function pointers #find_references("_function_table"); map_references(\&graph_function); } ########################## main() ############################## if (scalar(@ARGV) < 1) { print "usage: perl stack.pl file.dis [file.sym]\n"; exit; } # The symbol table is produced by the complier. # The disassebled view by itself cuts off the labels at 15 characters... if (scalar(@ARGV) >= 2) { parse_symbols($ARGV[1]); #dump_symbols(); } # To get the imput format, I went into to the LCE IDE, opened an # assembly view, and then choose save as. # Because of the optomistic way in which the dissassembler attempts # to find the variable length instructions memory, I had to make a # few manual edits to recover labels. This may have been made redundant # by bringing in the symbol file. disassemble ($ARGV[0]); #Gives main and each interrupt maximum extent; but doesn't tell # you anything about where those occur #vector_report(); #label_report builds up the database of stack exent starting from each # label(i.e. function, when looking at C compiler output) #The flat run gets the stack adjustments made in the function itself, # and the recursive call gets the total reachable. label_report($FLAT); label_report($RECURSIVE); #Dump the call graph in a format suitable for TGWikiBrowser # http://www.touchgraph.com map_graph(\%call_graph, \&bare_function); # The called by graph lets you explore it from the opposite direction; # mostly usefull for the slightly different way that the [] stack # call depths are linked into the graph #invert_graph(\%call_graph, \%called_by_graph); #map_graph(\%called_by_graph, \&bare_function); #Add information for function pointer calls (BR AX) # very primitive, user will need to determine which ones could # actually be called from any given point. do_references(); #compute_stack($ORIGIN, \%depths{0}, 0, $RECURSIVE); #dump_code ();