#!/usr/bin/python
"""
Collect library dependency information for an executable or shared
library and optionally generate instructions for building a graph. 
Because ELF information is architecture-dependent, collect and parse
information from readelf (from binutils).  The runtime linker is
used to generate paths, so the object needs to be the real target,
not a wrapper.

Usage:
$ lsbdepgraph.py [-e] [-g] [-a ABI] object

Dumps a list of dependency information; with -g writes graphing
information into a file object.dot

-a ABI selects the ABI (i.e. LSB version) to check against

Generating a graph from the dot file requires the graphviz toolset.
At last check, pkgs available for Debian, SuSE, Connectiva, PLD.
Else check http://www.graphviz.org.

$ dot -Tpng command.dot -o command.png

or for a more reduced graph,

$ tred command.dot | dot -Tpng -o command.png
"""

# Copyright (C) 2003-2005 The Free Standards Group
# Author: Mats Wichmann, Intel Corporation
#
# This is $Revision: 1.7 $
#
# $Log: lsbdepgraph.py,v $
# Revision 1.7  2005/03/12 22:20:31  mats
# Change default to test output; -g to produce dotty graph.  More cleanup
# and improve commenting.  Fix problems with illegal characters in graph
# file by quoting.  Filter out the dynamic linker name.
#
# Revision 1.6  2005/03/12 14:10:41  mats
# Add option handling (so far only LSB version switching works);
# get a little smarter about dynamic linker if argument is a library;
# assorted cleanup, mostly of comments
#
# Revision 1.5  2005/02/28 19:16:09  mats
# First cut at changes for bug 722 - it does work on libs now
#
# Revision 1.4  2004/06/10 14:35:38  mats
# Switch to LSB 2.0 mode. Update the request list.
#
# Revision 1.3  2003/09/19 21:46:09  mats
# Update TODO list, add pointer to where to find graphviz
#
# Revision 1.2  2003/09/12 21:49:50  mats
# Trim debugging output by not doing a top-level row that's already been seen.
# Turn off debugging.  Instead, dump the same info as comments to the .dot file
#

# Request list updated 12 March 2005
# + add a switch to display/not display lsb libs individually
#   they started expanded, now are collapsed to make a cleaner graph
#   still don't want graph lines to individual lsb libs
#   (option exists but does nothing)
# + add command line option to give a title to the graph
# + for text-mode, optionally backtrace to package providing the lib
# + external-expansion possibility: as libraries are identified, some
#   info can be added that helps categorize them, e.g. maybe flag
#   "Gnome libraries" or "libraries in the libbat" or such.

# Method: do the equivalent of 'ldd' to gather the list of libraries
# Then for the object and each library actually walk down the dependency chain
# Each library is a class instance whic holds the libraries it depends on

import os
import sys
from optparse import OptionParser


class Lib:
    """each Lib instance describes one library, with both a short name
    and the full path, and room to hang a list of dependencies"""
    def __init__(self, name, fullpath):
        self.name = name
        self.fullpath = fullpath
        index = name.find('.so')
        if index > 0:
            self.shortname = name[:index]
        else:
            self.shortname = None
        self.deps = []
        self.seen = False

    def isLSB(self):
        if self.shortname in abilibs:
            return True
        else:
            return False


def checkFileType(binary):
    """return type of file, choice is (None, "EXEC", "DYN")"""
    cmd = "readelf --header " + binary + " 2> /dev/null"
    handle = os.popen(cmd)
    data = handle.readlines()
    # readelf command fails on non-ELF objects (error is reaped on close)
    if handle.close():
        return None
    for line in data:
        # looking for these lines:
        # Type:                              DYN (Shared object file)
        # Type:                              EXEC (Executable file)
        line = line.strip().split(':')
        if line[0] == 'Type':
            f = line[1].strip()
            if f[:3] == "DYN":
                return f[:3]
            if f[:4] == "EXEC":
                return f[:4]
            return None


def checkObject(object, liblist):
    """gather information on library dependencies of an object
    object is a Lib instance. liblist is a previously generated
    list of all the libraries in use. dependencies are looked up
    on it and then attached to the instance's deps list"""
    cmd = "readelf --dynamic " + object.fullpath
    data = os.popen(cmd).readlines()
    for line in data:
        # looking for these lines:
	#  0x00000001 (NEEDED)                     Shared library: [libc.so.6]
        line = line.strip().split()
        if line and line[1] == '(NEEDED)':
            found = line[4][1:-1]
            for lib in liblist:
                if lib.name == found:
                    object.deps.append(lib)
                    break

dynlinkers = ["/lib64/ld-linux-x86-64.so.2", "/lib/ld-linux.so.2",
   "/lib/ld-linux-ia64.so.2", "/lib64/ld64.so.1",
   "/lib/ld.so.1", "/lib/ld64.so.1"]

def scanForLibs(object, type):
    """return the list of libraries used by a command-line argument"""
    libs = []
    for dynlinker in dynlinkers:
	if os.path.exists(dynlinker):
	    break
    else: 
	return none
    # The easiest way is to ask the dynamic linker to do the work for
    # us using LD_TRACE_LOADED_OBJECTS. 
    if type == "EXEC":
        cmd = "LD_TRACE_LOADED_OBJECTS=1 " + object
    else:
        # For a shlib, since the linker isn't bound in, we need to
        # insert it on the line
        cmd = "LD_TRACE_LOADED_OBJECTS=1 " + dynlinker + " " + object
    data = os.popen(cmd).readlines()
    for line in data:
	# looking for these lines:
	#         libc.so.6 => /lib/tls/libc.so.6 (0x40041000)
        line = line.strip().split()
        if len(line) < 4: continue  # skip dummy entries
	if line[2] == dynlinker: continue
        libs.append(Lib(line[0], line[2]))
    return libs


def splitLibList(libs):
    """split the library list into lsb and non-lsb, return the two lists
    Side effect: the short names of lsb libraries are squished to 'lsb'"""
    lsblibs = []
    nonlsblibs = []
    for lib in libs[:]:
        if lib.isLSB():
            # it's an LSB library, squish the short name
            lib.shortname = 'lsb'
            lsblibs.append(lib)
        else:
            nonlsblibs.append(lib)
    return lsblibs, nonlsblibs


def getPath(binary):
    """try to find a binary using PATH. returns the first match"""
    dirs=os.getenv("PATH").split(":")
    for file in dirs:
        p = file + os.sep + binary
        if os.path.exists(p):
             return p
    return None


def dumpLibs(top, dotfile):
    """print out libraries and dependencies as comments"""
    if top.deps and not top.seen:
        top.seen = True
        print >> dotfile, "# %s (%s) -> " % (top.name, top.fullpath), 
        for dep in top.deps:
            print >> dotfile, dep.name,
        print >> dotfile
        for entry in top.deps:
            dumpLibs(entry, dotfile)


def dumpDot(binary, lsblibs, nonlsblibs):
    """produce a .dot format graphing file from the gathered data
    this file is to later be processed by the dot command. assorted
    reductions are made to simplify the graph"""
    # this is messy stuff: some of the hardcoded info like ratio/size
    # was developed through trial-and-error, there should be better
    # ways to supply and control this
    dotfile = open(binary.name + ".dot", 'w')
    print >> dotfile, 'digraph "%s" {' % binary.name
    print >> dotfile, '  ratio="1.0"'
    print >> dotfile, '  size="14.0,14.0"'      # a WILD guess...
    print >> dotfile, '  "%s"\t[shape=Mdiamond]' % binary.name
    print >> dotfile, '  subgraph clusterLSB {'
    print >> dotfile, '    style=filled\n    shape=box\n    color=palevioletred'
    print >> dotfile, '    label="LSB Libraries"'
    print >> dotfile, '    node [style=filled, color=white]'
    # LSB libraries are reduced to a single node (also by splitLibList)
    print >> dotfile, '    "lsb"'
    print >> dotfile, '  }'

    print >> dotfile, '  subgraph clusterNonLSB {'
    print >> dotfile, '    style=filled\n    shape=box\n    color=aquamarine'
    print >> dotfile, '    label="non-LSB Libraries"'
    print >> dotfile, '    node [style=filled, color=white]'
    # loop through lsblibs skipped intentionally.  This might miss some
    # lsb library dependencies on other libs, not sure
    for lib in nonlsblibs:
        print >> dotfile, '    "%s"' % lib.shortname
    print >> dotfile, '  }'
    # abbreviate all the binary.name -> lsblib links to just a single one
    print >> dotfile, '  "%s" -> "lsb"' % binary.name
    netlist = []
    for lib in nonlsblibs:
        netlist.append('  "%s" -> "%s"' % (binary.name, lib.shortname))
        if lib.deps:
            for l in lib.deps:
                netlist.append('  "%s" -> "%s"' % (lib.shortname, l.shortname))
    netlist.sort()
    newlist = []
    for i in range(len(netlist))[1:]:
        if netlist[i] != netlist[i-1]:
            newlist.append(netlist[i])
    print >> dotfile, '# DEBUG - %d elements reduced to %d' % (len(netlist), len(newlist))
    for net in newlist:
        print >> dotfile, net

    print >> dotfile, '}'
    print >> dotfile, '#==== library dump'
    dumpLibs(binary, dotfile)
    dotfile.close()


def dump(top):
    """print out libraries and dependencies"""
    if top.deps and not top.seen:
        top.seen = True
        print "%s (%s) -> " % (top.name, top.fullpath), 
        for dep in top.deps:
            print dep.name,
        print
        for entry in top.deps:
            dump(entry)


### main
usage="usage: %prog [options] object"
parser = OptionParser(usage)
parser.add_option("-e", "--expand",
                  action="store_true", dest="expand", default=False,
                  help="expand LSB library list in output graph")
parser.add_option("-g", "--graph", 
		  action="store_true", dest="graph", default=False,
                  help="generate a dotty graph")
parser.add_option("-a", "--abi", default="LSB3",
                  help="ABI version: LSB1, LSB2, LSB3 [default: %default]")
(opts, args) = parser.parse_args()

# these are the defined library packages
LSBCORE = ["libc", "libpthread", "libm", "libdl", "libutil", 
    "libncurses", "libcrypt", "libz", "libpam", "libgcc_s"]
LSBGRAPHICS = ["libX11", "libXext", "libXt", "libSM", "libICE", "libGL"]
LSBCXX = ["libstdc++"]
LSBMAGIC = ["lsb" ] 	# a special cookie

if opts.expand: print "expansion not implemented yet, ignoring"
if opts.abi == "LSB3": abilibs = LSBCORE + LSBGRAPHICS + LSBCXX + LSBMAGIC
if opts.abi == "LSB2": abilibs = LSBCORE + LSBGRAPHICS + LSBCXX + LSBMAGIC
if opts.abi == "LSB1": abilibs = LSBCORE + LSBGRAPHICS + LSBMAGIC

if len(args) != 1:
    parser.error("incorrect number of arguments")

for file in args:
    sep = file.rfind(os.sep)
    if sep >= 0:        # has a pathname separator, just use this path
        path = file[:]  # need copy, not reference
        file = file[sep+1:]
    else:                # no separator, look up using PATH
        path = getPath(file)
        if not path:
            print "Error: %s not found, skipping" % file
            continue
    type = checkFileType(path)
    if not type:        # if it's not ELF, no point in continuing
        continue
    object = Lib(file, path)
    libs = scanForLibs(path, type)
    checkObject(object, libs)
    for lib in libs:
        checkObject(lib, libs)
    if opts.graph:
        lsblibs, nonlsblibs = splitLibList(libs)
        dumpDot(object, lsblibs, nonlsblibs)
    else:
	dump(object)
