#! /usr/bin/env python

"""codeplot.py -- Creating (rather weird) source code plots.

This module takes a Python source code file and creates a two-dimensional 
plot out of it. The plot generated is a symmetrical one, much like a 2D 
matrix where a dot at (i,j) indicates that lines i and j in the input file 
are the same.

Sounds silly? Maybe, yes. After all, this is a one day fun 'project' while
reading a book on, you guessed it, patterns. But I guess you'll be surprised 
of what you can see in these silly plots.

Peaks result by definition from identical lines (in red). Very often this 
will be the case due to empty lines, therefore these lines are marked 
distinctly (grey). Other interesting things come from lines that are 
similar, but not identical. In the current implementation, "similar" means 
lines that are identical modulo an arbitrary indentation (marked in blue). 
So this is a little bit only of a bias towards Python as a language, but in 
fact you can run codeplot on any kind of ASCII file (that is made of 
individual lines, separated by newlines).

The plots are generated using the new PIDDLE interface (during development 
codeplot created PDF files) in the version 1.0.3. Some words of praise for 
PIDDLE: it's cool! Without it codeplot would never have happened.

What's the future of this thing?! I don't know! I'll do a bit more here and 
there, maybe create a proper EPS file instead of PDF (such that it can be 
included somewhere else more easily). There's likely still something to 
improve to handle indentation by tabs properly as well. It could be faster, 
perhaps, but it's inherently O(n^2), so don't use it on a 200 KB C file 
(yes - they *do* exist, sigh...). Using Numeric might be an idea, not 
sure... One could generate a real image inside the PDF to reduce the file
size... If you think you can do something to improve it let me know. If you 
use it in some interesting way, let me know, too.

Dinu Gherman, gherman@europemail.com
1999-07-28
"""


__version__ = "0.1.1"


import sys, os, string, re
from piddle import *
import piddlePDF


# Constant that should be in the string module, perhaps.
eolMarkers = "\n\r"

# Regular expressions for source code line classification.
# Tabs not checked...
mtLine = re.compile('\s*\n')
mtLineNoEOL = re.compile('\s*')

# Mapping to peak colors.
peakColors = {'mt':grey, 'diff':red, 'similar':blue}


def isEmpty(line, withEOL=1):
    "Check if a code line is empty, with or without EOL."
    if withEOL:
        m = mtLine.match(line)
    else:
        m = mtLineNoEOL.match(line)
    return m is not None


def rstripEOL(line):
    "Remove trailing EOL from a line."

    # Can't use string.lstrip, as this is removing ALL whitespace, too.
    # BTW, why not have string.[rl]strip(line, charSet=whitespace)?
    l = line[:]
    while l:
        if l[-1] in eolMarkers:
            l = l[:-1]
        else:
            break
    return l


def classifyLines(line1, line2):
    "Classify two lines."

    # First, examine identical lines.
    if line1 == line2:
        if mtLine.match(line1):
            return 'mt'
        else:
            return 'diff'

    # Now, search for equal lines, but with different indentation.

    # Strip off trailing EOLs and return, if any of the two is empty.
    l1 = rstripEOL(line1)
    l2 = rstripEOL(line2)
    if not l1 or not l2:
        return

    # Try finding one line in the other (both cases treated the same).
    pos = -1
    if len(l1) < len(l2):
        # Search line1 in line2.
        pos = string.find(l2, l1)
        l = l2
    elif len(l1) > len(l2):
        # Search line2 in line1.
        pos = string.find(l1, l2)
        l = l1

    # If found, check that leading space is whitespace and return.
    if pos >= 0 and isEmpty(l[:pos], withEOL=0):
        return 'similar'
    else:
        return


def makePlot(lines, inPath, outPath):
    "Generate a plot file for some input file."

    canvas = piddlePDF.PDFCanvas(name=outPath)
    canvas.defaultLineColor = black        
    canvas.saveGraphicsState()

    fontSize = 10
    font = Font(face='Helvetica', size=fontSize)

    # Calc. and apply  some values to center and scale the plot, if needed.
    n = len(lines)
    from pagesizes import A4
    width = A4[0]
    margin = 40
    if n < width - 2*margin:
        tf = width/2 - n/2
        sf = 1.0 # No scaling needed.
    else:
        tf = margin
        sf = (width - 2*margin)/n
    canvas.translate(tf, tf)
    canvas.scale(sf, sf)

    # Draw filename.
    inPath = string.split(inPath, os.sep)[-1]
    canvas.drawString(inPath, 0,len(lines) + 2*fontSize, font)

    # Draw outer box frame.
    canvas.drawRect(0,0, len(lines),len(lines), edgeColor=black, edgeWidth=0.0)

    # Draw peaks (looping only over half the matrix).
    for i in xrange(len(lines)):
        for j in xrange(i+1):
            try:
                col = peakColors[classifyLines(lines[i], lines[j])]

                # Draw rectangular peaks.
                draw = canvas.drawRect
                draw(i,j, i+1,j+1, edgeColor=col, edgeWidth=0.0, fillColor=col)
                draw(j,i, j+1,i+1, edgeColor=col, edgeWidth=0.0, fillColor=col)

                # Draw circular peaks, alternatively.
                # draw = canvas.drawEllipse
                # draw(i,j, i+1,j+1, edgeColor=col, edgeWidth=0.0, fillColor=col)
                # draw(j,i, j+1,i+1, edgeColor=col, edgeWidth=0.0, fillColor=col)
            except KeyError:
                pass

    canvas.restoreGraphicsState()
    canvas.flush()


def readAndPlotFile(inPath=sys.argv[0]):
    "Read a source file and generate a PDF plot for it."

    lines = open(inPath).readlines()
    outPath = inPath + '.pdf'
    makePlot(lines, inPath, outPath)


if __name__ == "__main__":
    try:
        arg0 = sys.argv[0]
        # Hack to 'really' raise IndexError.
        arg1 = sys.argv[1]
        # Loop over input file names if profided.
        for arg in sys.argv[1:]:
            readAndPlotFile(arg)
    except IndexError:
        # Print usage message.
        print "Module: %s - Generating PDF code plots from source files." % arg0
        print "Usage:  python %s [<file1> [<file2> ...]]" % arg0
        print "Testing now with %s..."  % arg0
        readAndPlotFile(arg0)
        print "Done."

