#!/usr/bin/perl

##########################################################################
#
# PIC 16C84, 16F84, 16F627, 16F628 (and related) disassembler
#
#     (c) 2004-2009 Jorj Bauer, <jorj@seas.upenn.edu>
#
# Placed in the public domain. Free for any (including commercial) use.
#
# Please be courteous and give me credit where credit may be due.
#
##########################################################################

use strict;
use warnings;
use Getopt::Std;
use GraphViz;
use Data::Dumper;

our ($opt_a, $opt_b, $opt_d, $opt_D, $opt_i, $opt_r, $opt_g, $opt_m, 
     $opt_l, $opt_s, $opt_S);

our @depths;     # depth of all memory locations we've tested so far
our @avenues;    # Avenues contain CP, pc, depth
our @stacks;     # Stack traces for deepest calls to functions

# Default options for all graph nodes.
my %graphopts = ( fontname => 'Times-Roman',
		  labelfontname => 'Times-Roman',
		  fontsize => 12,
		  labelfontsize => 10,
		  );

my $version = '1.09';

# List of microcontroller instructions. The arrays are information about 
# how to disassemble the instruction:
#   [0] is the command (sans arguments);
#   [1] is the mask (to derrive the command or the arguments),
#   [2] is the type of operand. (Note that this could probably be derrived
#       from the mask, but I've decided to include it here in order to 
#       provide some more flexibility if a future modification to this 
#       program makes it harder to derrive these from the mask.)
#       The values are:
#           0 means it's a standalone opcode,
#           F means it has a file register,
#           FB means it has a file, bitnumber,
#           FD means it has a file, destination,
#           8K means it has an 8-bit k (literal) value,
#           11K means it has an 11-bit k (literal) value

my %instructions = ( 
		     'CLRW'   => [ 0x0100, 0xff80, 0 ],
		     'CLRWDT' => [ 0x0064, 0xffff, 0 ],
		     'NOP'    => [ 0x0000, 0xff9f, 0 ],
		     'OPTION' => [ 0x0062, 0xffff, 0 ],
		     'RETFIE' => [ 0x0009, 0xffff, 0 ],
		     'RETURN' => [ 0x0008, 0xffff, 0 ],
		     'SLEEP'  => [ 0x0063, 0xffff, 0 ],

		     'CLRF'   => [ 0x0180, 0xff80, 'F' ],
		     'MOVWF'  => [ 0x0080, 0xff80, 'F' ],

		     'BCF'    => [ 0x1000, 0xfc00, 'FB' ],
		     'BTFSC'  => [ 0x1800, 0xfc00, 'FB' ],
		     'BTFSS'  => [ 0x1c00, 0xfc00, 'FB' ],
		     'BSF'    => [ 0x1400, 0xfc00, 'FB' ],

		     'ADDWF'  => [ 0x0700, 0xff00, 'FD' ],
		     'ANDWF'  => [ 0x0500, 0xff00, 'FD' ],
		     'COMF'   => [ 0x0900, 0xff00, 'FD' ],
		     'DECF'   => [ 0x0300, 0xff00, 'FD' ],
		     'DECFSZ' => [ 0x0b00, 0xff00, 'FD' ],
		     'INCF'   => [ 0x0a00, 0xff00, 'FD' ],
		     'INCFSZ' => [ 0x0f00, 0xff00, 'FD' ],
		     'IORWF'  => [ 0x0400, 0xff00, 'FD' ],
		     'MOVF'   => [ 0x0800, 0xff00, 'FD' ],
		     'RLF'    => [ 0x0d00, 0xff00, 'FD' ],
		     'RRF'    => [ 0x0c00, 0xff00, 'FD' ],
		     'SUBWF'  => [ 0x0200, 0xff00, 'FD' ],
		     'SWAPF'  => [ 0x0e00, 0xff00, 'FD' ],
		     'XORWF'  => [ 0x0600, 0xff00, 'FD' ],

		     'ADDLW'  => [ 0x3e00, 0xfe00, '8K' ],
		     'ANDLW'  => [ 0x3900, 0xff00, '8K' ],
		     'IORLW'  => [ 0x3800, 0xff00, '8K' ],
		     'MOVLW'  => [ 0x3000, 0xfc00, '8K' ],
		     'RETLW'  => [ 0x3400, 0xfc00, '8K' ],
		     'SUBLW'  => [ 0x3c00, 0xfe00, '8K' ],
		     'XORLW'  => [ 0x3a00, 0xff00, '8K' ],

		     'CALL'   => [ 0x2000, 0xf800, '11K' ],
		     'GOTO'   => [ 0x2800, 0xf800, '11K' ],
		     );

# Names of the basic file registers on the 16C84.
# Theoretically, user-defined file register variables could be named and 
# mapped into this space. Sounds like a future enhancement...

# These registers come from the PIC16f877A documentation. I think they'll be 
# correct for other PICs (i.e. a superset), but...

my %registers = ( 0x00 => 'INDF',
		  0x01 => 'TMR0',
		  0x02 => 'PCL',
		  0x03 => 'STATUS',
		  0x04 => 'FSR',
		  0x05 => 'PORTA',
		  0x06 => 'PORTB',
		  0x08 => 'EEDATA',
		  0x09 => 'EEADR',
		  0x0A => 'PCLATH',
		  0x0B => 'INTCON',
		  0x0C => 'PIR1',
		  0x0D => 'PIR2',
		  0x0E => 'TMR1L',
		  0x0F => 'TMR1H',
		  0x10 => 'T1CON',
		  0x11 => 'TMR2',
		  0x12 => 'T2CON',
		  0x13 => 'SSPBUF',
		  0x14 => 'SSPCON',
		  0x15 => 'CCPR1L',
		  0x16 => 'CCPR1H',
		  0x17 => 'CCP1CON',
		  0x18 => 'RCSTA',
		  0x19 => 'TXREG',
		  0x1A => 'RCREG',
		  0x1B => 'CCPR2L',
		  0x1C => 'CCPR2H',
		  0x1D => 'CCP2CON',
		  0x1E => 'ADRESH',
		  0x1F => 'ADCON0',

		  0x80 => 'INDF',
		  0x81 => 'OPTION',
		  0x82 => 'PCL',
		  0x83 => 'STATUS',
		  0x84 => 'FSR',
		  0x85 => 'TRISA',
		  0x86 => 'TRISB',
		  0x88 => 'EECON1',
		  0x89 => 'EECON2',
		  0x8A => 'PCLATH',
		  0x8B => 'INTCON',
		  0x8C => 'PIE1',
		  0x8D => 'PIE2',
		  0x8E => 'PCON',
		  
		  0x91 => 'SSPCON2',
		  0x92 => 'PR2',
		  0x93 => 'SSPADD',
		  0x94 => 'SSPSTAT',

		  0x98 => 'TXSTA',
		  0x99 => 'SPBRG',
		  
		  0x9C => 'CMCON',
		  0x9D => 'CVRCON',
		  0x9E => 'ADRESL',
		  0x9F => 'ADCON1',

		  0x100 => 'INDF',
		  0x101 => 'TMR0',
		  0x102 => 'PCL',
		  0x103 => 'STATUS',
		  0x104 => 'FSR',

		  0x106 => 'PORTB',

		  0x10A => 'PCLATH',
		  0x10B => 'INTCON',
		  0x10C => 'EEDATA',
		  0x10D => 'EEADR',
		  0x10E => 'EEDATH',
		  0x10F => 'EEADRH',

		  0x180 => 'INDF',
		  0x181 => 'OPTION',
		  0x182 => 'PCL',
		  0x183 => 'STATUS',
		  0x184 => 'FSR',

		  0x186 => 'TRISB',

		  0x18A => 'PCLATH',
		  0x18B => 'INTCON',
		  0x18C => 'EECON1',
		  0x18D => 'EECON2',
		  
		  );

# Names of the bits of the common registers.
my %bits = ( 'STATUS' => [ 'C', 'DC', 'Z', '/PD', '/TO', 'RP0', 'RP1', 'IRP' ],
	     'PORTA'  => [ 'RA0', 'RA1', 'RA2', 'RA3', 'RA4/T0CKI', 5, 6, 7 ],
	     'PORTB'  => [ 'RB0', 'RB1', 'RB2', 'RB3', 'RB4', 'RB5', 'RB6',
			   'RB7' ],
	     'INTCON' => [ 'RBIF', 'INTF', 'T0IF', 'RBIE', 'INTE', 'T0IE',
			   'EEIE', 'GIE' ],
	     'OPTION' => [ 'PS0', 'PS1', 'PS2', 'PSA', 'T0SE', 'T0CS', 
			   'INTEDG', '/RBPU' ],
	     'EECON1' => [ 'RD', 'WR', 'WREN', 'WRERR', 'EEIF', 5, 6, 7 ],
	     );
	     
# Labels for well-known addresses. Again, the users' subroutine labels could
# be mapped into this array.
my %labels = ( 0x0000 => 'RESETVEC',
	       0x0004 => 'INTRVEC',
	       0x2007 => 'CONFIG',
	       0x2100 => 'EEPROM',
	       );

# Memory locations that shouldn't be disassembled, but just printed as 'dw ...'
# 0x2007 is the configuration bits, and 0x2100+ are the EEPROM data.
# Note that the DWs and max_program_space should probably be combined, and 
# turned into something less processor-dependent. FIXME (eventually).
my @DWs = ( 0x2007, 0x2100..0x2200 );
my $MAX_PROGRAM_SPACE = 0x2000;

# @memory contains the entire program.
our @memory;

# %mask is a mapping of whether or not we want to disassemble any particular
# range of bytecode. It's populated as the file is read in.
my %mask;

# $codesize is the index of the highest word of code we've read in. This could
# be gotten to by looking at the size of @memory, but at some point maybe I'll
# come up with a more efficient way to deal with the program... and then it 
# will be important to know what the size of the code is.
my $codesize = 0;

# We try to keep track of the state of the RP0 bit (bank state), so that we
# can show the file register being dealt with more accurately. This isn't
# going to be perfect, but it'll be a start.
my $RP = 0;

# We also try to keep track of the state of PCLATH<4:3>, code-page state.
my $CP = 0;

getopts('b:dD:i:m:r:alg:sS:');

usage() if (! ($opt_b || $opt_i));

make_opcodes_lowercase() if ($opt_l);

if ($opt_b) {
    $codesize = read_binary($opt_b);
} elsif ($opt_i) {
    $codesize = read_intelhex($opt_i);
} else {
    usage();
}

if ($opt_r) {
    load_register_file($opt_r);
}

if ($opt_m) {
    load_map_file($opt_m);
}

my $graph;
if ($opt_g) {
    $graph = GraphViz->new();
}

# First pass: parse each instruction and determine if it refers to another 
# chunk of memory. If it does, we'll create a label for that chunk of memory.
# Note that we have to keep track of $RP and $CP to do this properly.
my $address = 0;
my $last_label;
do {
    my $word = $memory[$address] || 0;

    my $opcode = find_opcode($word);

    # Setup these three variables to track $CP and $RP
    my $register = ($word & 0x7f) + ($RP << 7);
    my $bitnum = ($word >> 7) & 7;
    my $registername = make_regname($register);

    # For bit clear and set, we'll watch for flips of RP[01] and PCLATH[3:4].
    check_pages($opcode, $registername, $bitnum)
	if ($opcode =~ /^b[sc]f/i);
    
    if (uc($opcode) eq 'CALL' || uc($opcode) eq 'GOTO') {
	# Find the referenced address and label it if it's not already labelled

        my $target = ($word & 0x7FF) + ($CP << 11);
	my $label = make_label($target);

	if ($label !~ /^L[0-9A-Fx]+/) {
	    $last_label = $label;
	} else {
	    if ($opt_g) {
		# Anonymous labels have an implicit edge from the last 
		# non-anonymous label.
		$graph->add_edge($last_label => $label,
				 dir => 'forward',
				 style => 'dotted',
				 color => 'black',
				 %graphopts,
				 );
	    }
	}
	if ($opt_g) {
	    # graphing, so add it
	    if (!$labels{$target}) {
		$labels{$target} = $label;
		$graph->add_node($label,
				 %graphopts,
				 );
	    }
	    if (!$labels{$address} && !$opt_s) {
		# (don't add anon labels if we're in simple label mode.)
		$labels{$address} = make_label($address);
		$graph->add_node($labels{$address},
				 %graphopts,
				 );
	    }

	    # Add a line for the call to the subroutine or goto statement.
	    my $from = $labels{$address};
	    $from = minimal_labels(\%labels, $address)
		if ($opt_s);

	    if (is_goto($address)) {
		$graph->add_edge($from => $label,
				 %graphopts,
				 dir => 'forward',
				 style => 'solid',
				 color => 'black',
				 );
	    } else {
		$graph->add_edge($from => $label,
				 dir => 'both',
				 style => 'dashed',
				 color => 'red',
				 %graphopts,
				 );
	    }

	} else {
	    # not graphing, so it's simple...
	    $labels{$target} = $label unless ($labels{$target});
	}
    }

    $address++;
} while ($address < $codesize);

# Add graph edges for all of the postponed fallthrough labels.
if ($opt_g) {
    # we only want to "fall through" (create continuance labels) if the 
    # previous instruction wasn't an unconditional branch (i.e. if there's
    # no interruption in the program flow).

    foreach my $i (sort {$b <=> $a} keys(%labels)) {
	next if ($i >= $MAX_PROGRAM_SPACE || $i <= 5);

	# Skip anon labels if simple labels are enabled
	next if ($opt_s && $labels{$i} =~ /^L[0-9A-Fx]+/);

	my $from;
	if ($opt_s) {
	    $from = minimal_labels(\%labels, $i);
	    next if $from eq $labels{$i};
	} else {
	    $from = find_previous_label(\%labels, $i);
	}

	$graph->add_edge($from => $labels{$i},
			 dir => 'forward',
			 style => 'dotted',
			 color => 'magenta',
			 %graphopts)
	    unless (unconditional_branch_before($i));
    }

}

perform_call_depth_analysis() if ($opt_d);

# Second pass: go back through and print the disassembly.
$RP = 0;
$CP = 0;
$address = 0;
my $insegment = 0; # if 0, we need to print an 'ORG'
do {
    if ($mask{$address}) {
	if (!$insegment) {
	    print sprintf("\n%s%-16.16s %-8.8s 0x%.4X\n\n",
			  format_address($address), 
			  "", 
			  $opt_l?"org":"ORG",
			  $address);
	    $insegment = 1;
	}

	my $word = $memory[$address] || 0;
	
	my $disassembly = disassemble_instruction($address, $word);
        my $opcode = find_opcode($word);

        my $comment = '';
        if ($RP != 0) {
           $comment = '; NOTE: register bank ' . $RP;
        }
        if ($CP != codepage($address)) {
           if ($opcode =~ /^call$/i || $opcode =~ /^goto$/i) {
               if (!$comment) {
                   $comment = '; NOTE: cross-page: ' . codepage($address). '=>' . $CP;
               } else {
                   $comment .= ' and CP ' . codepage($address) . '=>' . $CP;
               }
           }
        }
        if ($opt_d) {
           # add call-depth analysis to comments
           if (defined($depths[$address])) {
              if ($comment) {
                 $comment .= ' depth ' . $depths[$address];
              } else {
                 $comment = '; depth ' . $depths[$address];
              }
              if ( $depths[$address] >= $opt_D ) {
                  $comment .= ' - stack: ' . join(' ', @{$stacks[$address]});
              }
           } else {
              $comment .= '; DEAD CODE?';
           }
        }
        if ($comment) {
           $disassembly = sprintf("%-55.55s %s",
                                  $disassembly,
                                  $comment);
        }

        print $disassembly, "\n";

	# Make the output pretty. If it was a RETFIE / RETURN / GOTO et al and
	# the next instruction has a label, we'll print an extra newline.
	$opcode = uc(find_opcode($word));

	if ( ($opcode eq 'GOTO' || $opcode eq 'RETURN' || 
	      $opcode eq 'RETFIE' || $opcode eq 'RETLW')
	     &&
	     defined $labels{$address+1} ) {
	    print "\n";
	}

    } else {
	$insegment = 0;
    }

    $address++;
} while ($address < $codesize);

# If graphing, save the graph
if ($opt_g) {
  open(GIF, ">", $opt_g) || die "Unable to open '$opt_g' for writing: $!";
  binmode GIF;
  print GIF $graph->as_gif();
  close GIF;
}

exit (0);

sub perform_call_depth_analysis {
    my $pc = 0; # program counter
    $CP = 0; # code page bits

    # Start with two avenues: one @ 0, and one @ 4
    add_avenue(0, 0, []); # mem location 0, starting with depth=0
    add_avenue(4, 1, ['INTERRUPT']); # Interrupts, by definition, must be 1 deep

    # Loop as long as we have avenues to examine.
    while (scalar @avenues) {
	my $avenue = pop @avenues;
	examine_avenue($avenue);
    }

    # Dump the results
#    $address = 0;
#    do { 
#	if ( defined $depths[$address] ) {
#	    print format_address($address), " depth $depths[$address]\n";
#	}
#	$address++;
#    } while ($address < $codesize);
}

sub add_avenue {
    my ($where, $depth, $stack) = @_;

    my $CP = codepage($where);

    die("Something is very wrong with the call depth analysis. Currently examining\ndepth $depth, which seems far too deep. Stack dump: " . Dumper($stack))
	if ($depth >= 100);

    # Only examine this avenue if we haven't already calculated the depth
    # of that part of code, or if the depth we calculated for that code is 
    # less than our starting value.

    my $label = $labels{$where} || 'anonymous';

    return
	if (defined($depths[$where]) && $depths[$where] >= $depth);

    # See if there are any avenues pending for this branch. If so, modify 
    # them with the maximum depth rather than adding new avenues to traverse

    foreach my $i (@avenues) {
	if ($i->{pc} == $where) {
	    if ($i->{depth} < $depth) {
		$i->{depth} = $depth;
		$i->{stack} = $stack;
	    }
	    return;
	}
    }

    my $a = { pc => $where,
	      depth => $depth,
	      stack => $stack };
    push (@avenues, $a);
}

sub clone_stack {
    my ($stack) = @_;
    my @ret;

    foreach my $i (@$stack) {
	push(@ret, $i);
    }

    return @ret;
}

sub examine_avenue {
    my ($avenue) = @_;

    my $pc = $avenue->{pc};
    $CP = codepage($pc);
    $RP = 0; # not really important for this analysis
    my $depth = $avenue->{depth};
    my @stack = clone_stack($avenue->{stack});

    my $label = $labels{$pc};
    if (!$label) {
	$label = "anonymous_" . format_address($pc);
    }

    do {
	# $mask is true for a given address if we think it's disassemblable.
	if ($mask{$pc}) {
	    my $word = $memory[$pc] || 0;
	    
	    my $disassembly = disassemble_instruction($pc, $word);
	    my $opcode = find_opcode($word);
	    
	    # For bit clear and set, we'll watch for flips of RP[01] and
	    # PCLATH[3:4].   
	    my $bitnum = ($word >> 7) & 7;
	    my $register = ($word & 0x7f) + ($RP << 7);
	    my $registername = make_regname($register);
	    check_pages($opcode, $registername, $bitnum)
		if ($opcode =~ /^b[sc]f/i);

	    # Follow the execution path. We want to note how deep a call goes
	    # for every 'call' in the program. Once we know the depth for any
	    # given spot, we don't have to recalculate the depth at that 
	    # point again. So start at the beginning, find a call, calculate 
	    # the depth of that call, add 1, and mark this spot; then mark 
	    # all of the code from that point forward, until we hit a return
	    # without a branch around it, at that same depth.

	    # Note that this doesn't track branches executed via pcl changes.
	    # But simple tables should be unaffected by this limitation, since
	    # they're followed by a retlw. More complicated dispatch tables 
	    # would break this feature horribly though.

	    if ((!defined($depths[$pc]) || $depth > $depths[$pc])) {
		$depths[$pc] = $depth;
		my @clone = clone_stack(\@stack);
		$stacks[$pc] = \@clone;
	    }

	    # See if this is a branch - if so, queue a test of that
	    # branch at the current depth (+1 if it's a call). And if
	    # not, we'll just continue.

	    if ( $opcode =~ /call/i ) {
		my $target = ($word & 0x7FF) + ($CP << 11);
		add_avenue($target, $depth + 1, [@stack, $labels{$target}]);
	    } elsif ( $opcode =~ /goto/i ) {
		my $target = ($word & 0x7FF) + ($CP << 11);
		add_avenue($target, $depth, \@stack);
		return;
	    }
	    elsif ( $opcode =~ /btfs/i || $opcode =~ /incfsz/i || $opcode =~ /decfsz/i ) {
		my $target = $pc + 2; # already takes $CP into account
		add_avenue($target, $depth, \@stack); # don't add depth
	    }
	    elsif ( $opcode =~ /return/i || $opcode =~ /retlw/i || 
		    $opcode =~ /retfie/i ) {
		# And we're done with this avenue.
		return;
	    }

	}

	$pc++;
    } while ($pc < $codesize);
    # Either exited by running out of memory or by a return opcode.
}


sub find_previous_label {
    my ($labelsref, $address) = @_;

    foreach my $i (sort {$b <=> $a} keys %$labelsref) {
	return $labelsref->{$i}
	    if ($i < $address);
    }
    return 'unknown';
}

# find the first prior non-anonymous label and return it.
sub minimal_labels {
    my ($labelsref, $address) = @_;

    while ($address > 0) {
	my $label = find_previous_label($labelsref, $address);
	return $label
	    if ($label !~ /^L[0-9A-Fx]+$/);
	$address--; # FIXME: not ideal. Better to find the addr of the label
    }
    return; # undef
}

# return true if there's an unconditional branch just before this address.
# (used to compute flow diagrams.)
sub unconditional_branch_before {
    my ($address) = @_;

    # Is the previous instruction a return/goto/retfie/retlw?
    my $word = $memory[$address-1] || 0;
    my $opcode = uc(find_opcode($word));
    if ( ($opcode eq 'GOTO' || $opcode eq 'RETURN' || 
	  $opcode eq 'RETFIE' || $opcode eq 'RETLW') ) {
	# Check out the opcode before that and see if it's a skip
	$word = $memory[$address-2] || 0;
	$opcode = uc(find_opcode($word));

	return 1
	    if ($address == 1); # Can't check for the opcode before...

	if ($opcode eq 'BTFSS' || $opcode eq 'BTFSC' || $opcode eq 'DECFSZ' || $opcode eq 'INCFSZ') {
	    # Conditional skip; return 0.
	    return 0;
	} else {
	    # There's no way around it. The previous statement returned.
	    return 1;
	}
    }

    # If we get here, there was either no branch before this statement, or 
    # there was a branch that was conditional.
    return 0;
}

# return true if the statement at the given address is a GOTO (not a CALL)
# (used to compute flow diagrams.)
sub is_goto {
    my ($address) = @_;
    my $word = $memory[$address] || 0;
    my $opcode = uc(find_opcode($word));
    return ($opcode eq 'GOTO');
}

sub find_opcode {
    my ($word) = @_;

    foreach my $opcode (keys(%instructions)) {
	my $info = $instructions{$opcode};
	if (($word & $info->[1]) == $info->[0]) {
	    return $opcode;
	}
    }

    return "???";
}

sub get_type {
    my ($opcode) = @_;

    return $instructions{$opcode}->[2];
}

sub disassemble_instruction {
    my ($address, $word) = @_;

    return sprintf("%s%-16.16s %-8.8s 0x%.2X",
		   format_address($address),
		   $labels{$address},
		   $opt_l?'dw':'DW',
		   $word)
	if (is_dwaddr($address));
    
    my $opcode = find_opcode($word);
    my $type = get_type($opcode);
    my $base = sprintf("%s%-16.16s %-8.8s", 
		       format_address($address), 
		       $labels{$address} || '',
		       $opcode,
                       );

    if (!$type) {
	return $base;
    } elsif ($type eq 'F') {
	# If the RP0 flag is set for page 1, then add 0x80
	my $register = $word & 0x7F;
        $register += ($RP << 7);

	return sprintf("%s %s", $base, make_regname($register));
    } elsif ($type eq 'FD') {
	# If the RP0 flag is set for page 1, then add 0x80
	my $register = $word & 0x7F;
	$register += ($RP << 7);

	return sprintf("%s %s, %s", 
		       $base, 
		       make_regname($register),
		       (($word & 0x80) == 0) ? "W" : "F"
		       );
    } elsif ($type eq 'FB') {
	# If the RP0 flag is set for page 1, then add 0x80
	my $register = $word & 0x7F;
        $register += ($RP << 7);

	my $bitnum = ($word >> 7) & 7;
	my $registername = make_regname($register);

	# If the opcode is BSF or BCF, and the F is STATUS,
	# and the bitnumber is RP0, then update the current RP0 flag
        check_pages($opcode, $registername, $bitnum)
	    if ($opcode =~ /^b[sc]f/i);

	return sprintf("%s %s, %s", 
		       $base, 
		       $registername, 
		       make_bitname ($registername, $bitnum) );
    } elsif ($type eq '8K') {
	return sprintf("%s 0x%02X", $base, $word & 0xFF);
    } elsif ($type eq '11K') {
        my $target = ($word & 0x7FF) + ($CP << 11);
	return sprintf("%s %s", $base, make_label($target));
    }

    # Shouldn't ever reach here...

    return "???";
}

sub make_regname {
    my ($register) = @_;

    # If a register name is in the %registers hash, then return it. Otherwise
    # we just return the hex version of the number.

    return $registers{$register} if ($registers{$register});

    return sprintf("0x%02X", $register);
}

sub make_bitname {
    my ($regname, $bit) = @_;

    # If we can find a mapping for the bit name in the %bits hash, return it.
    # Otherwise just return the bit number.

    return $bits{$regname}->[$bit] if (defined $bits{$regname}->[$bit]);

    return $bit;
}

sub make_label {
    my ($address) = @_;

    # Turn an address into a unique label name. Currently, this does it 
    # by directly using the target address.

    return $labels{$address} if ($labels{$address});

    return sprintf("L%.4X", $address);
}

sub is_dwaddr {
    my ($addr) = @_;

    # Return whether or not the value is in the @DWs array.

    foreach my $i (@DWs) {
	return 1 if ($addr == $i);
    }

    return 0;

}

sub usage {
    print "Usage: $0 [-a] <-b <filename> | -i <filename>>\n".

	"\t-a:            show a column of addresses\n".
	"\t-b <filename>: read from binary file <filename>\n".
	"\t-d:            perform call depth analysis (adds max call depth to comments)\n".
	"\t-D <number>:   With -d, will include stack dumps when >= the given number\n".
	"\t-i <filename>: read from Intel Hex file <filename>\n".
	"\t-g <filename>: graph, and save the gif as <filename>\n".
	"\t-l:            show opcodes in lowercase\n".
	"\t-m <filename>: load register hints from mapfile <filename>\n".
	"\t-r <filename>: load register name hint file <filename>\n".
	"\t-s:            simplify labels as much as possible\n".
	"\t-S <regexes>:  ignore labels in mapfile that match given (colon-separated) regexes\n".
	"\n"
	;
    
    exit(-1);
}

# read_binary: read in a little-endian binary file.
# Drawback: can't tell what areas of memory to NOT disassemble (empty
#           regions).

sub read_binary {
    my ($filename) = @_;
    my ($byte, $codesize);

# Read in the entire file and populate the memory array.
    open (FILE, $filename) || die "Can't open '$filename': $!";
    binmode(FILE);
    
    while ( read(FILE, $byte, 2) == 2) {
	$mask{$codesize} = 1;
	$memory[$codesize++] = unpack("v", $byte);
    }
    close FILE;

    return $codesize;
}

# read_intelhex: read in an Intel Hexfile format, as generated by 
# picl and presumably MPASM. This lets us also tell which areas of
# memory are just holes, and shouldn't be disassembled.

sub read_intelhex {
    my ($filename) = @_;
    my ($line, $codesize, $num, $addr, $type, $data);

    $codesize = 0;

    open (FILE, $filename) || die "Can't open '$filename': $!";
    # Specifically NOT binmode.

    while ($line = <FILE>) {
	chomp $line;
	$line =~ /^:(..)(....)(..)(.*)$/;
	$num = hex($1);
	$addr = hex($2);
	$type = hex($3);
	$data = $4;

	# The Intel hex format is byte-based, but the PIC microcontroller
	# is word-based. We read in all of the data one byte at a time, and
	# make a determination as to whether it is the low or high word of
	# the memory word. (The Intel format is little-endian.)

	if ($type == 0) {
	    for my $i (0..$num-1) {
		my $offset = 0; # number of bytes to shift left
		my $byte = substr($data, $i*2, 2);
		$byte = hex $byte;
		if (($addr + $i) & 0x01) {
		    $offset = 8; # if it's an odd byte, it's a high byte
		}

                $memory[ ($addr + $i) / 2 ] ||= 0;

		$memory[ ($addr + $i) / 2 ] =
		    $memory[ ($addr + $i) / 2 ]  | ($byte << $offset);

		$mask{ int (($addr + $i) / 2) } = 1;
		$codesize = int (($addr + $i) / 2) 
		    if (int(($addr + $i)/2) > $codesize);
	    }
	}
    }
    close FILE;

    return $codesize;
}

sub format_address {
    my ($address) = @_;

    return sprintf("0x%.4X ", $address) if ($opt_a);

    return "";
}

sub make_opcodes_lowercase {
    foreach my $i (keys(%instructions)) {
	my $newkey = lc($i);
	$instructions{$newkey} = $instructions{$i};
	delete $instructions{$i};
    }
   
}

sub load_register_file {
    my ($file) = @_;

    # Read in a file that contains whitespace-delimited varable name / 
    # address information.

    open(FILE, $file) || die "Can't open register file '$file': $!";
    while (<FILE>) {
	chomp;
	my ($var, $addr)  = split(/\s+/);

	if ($addr =~ /^L(.+)$/) {
	    # It's a label, not a var.
	    $addr = hex($1);
	    $labels{$addr} = $var;
	} else {
	    # It's a var.
	    $addr = hex($addr) if ($addr =~ /^0x/i);
	    $registers{$addr} = $var;
	}
    }
    close FILE;
}

sub load_map_file {
    my ($file) = @_;

    # Find anything that looks like an address, and create a dummy register 
    # hint file. Then load that.

    my $tmpfile = "/tmp/out.$$"; # FIXME this is a lousy temp file.
    open(OUT, ">", $tmpfile) || die "Can't open temp file '$tmpfile': $!";

    open(FILE, $file) || die "Can't open map file '$file': $!";
readloop:
    while (<FILE>) {
	chomp;
	my ($var, $addr, $loc, $other) = /^(.{25})(.{11})(.{11})(.+)$/;
	$var ||= '';
	$addr ||= '';
	$loc ||= '';
	$other ||= '';

	$var =~ s/ //g;
	$addr =~ s/ //g;
	$loc =~ s/ //g;
	if ($opt_S) {
	    foreach my $i (split(/:/, $opt_S)) {
		next readloop
		    if ($var =~ /$i/);
	    }
	}

	if ($addr =~ /0x/) {
	    if ($loc eq 'program') {
		print OUT "$var\tL$addr\n"; # note the L
	    } elsif ($loc eq 'data') {
		print OUT "$var\t$addr\n";
	    }
	}
    }
    close FILE;
    close OUT;

    load_register_file($tmpfile);
    unlink $tmpfile;
}

sub check_pages {
    my ($opcode, $registername, $bitnum) = @_;

    if ($opcode =~ /^b[sc]f/i) {
	my $bitname = make_bitname($registername, $bitnum);
	if ( $bitname =~ /^rp0$/i ) {
	    if ($opcode =~ /^bsf/i) {
		$RP |= 1;
	    } else {
		$RP &= ~0x01;
	    }
	} elsif ( $bitname =~ /^rp1$/i ) {
	    if ($opcode =~ /^bsf/i) {
		$RP |= 2;
	    } else {
		$RP &= ~0x02;
	    }
	} elsif ( $registername =~ /^pclath$/i && 
		  ($bitnum == 3 || $bitnum == 4) ) {
	    if ($opcode =~ /^bsf/i) {
		$CP |= (1<<($bitnum-3));
	    } else {
		$CP &= ~ (1<<($bitnum-3));
	    }
	}
    }
}

sub codepage {
    my ($addr) = @_;

    return ( ($addr & 0x1800) >> 11);

}
