diff --git a/src/qpcr/MinerMethod.py b/src/qpcr/MinerMethod.py
index 194c219..6fb3c9d 100644
--- a/src/qpcr/MinerMethod.py
+++ b/src/qpcr/MinerMethod.py
@@ -1,7 +1,21 @@
 #!/usr/bin/env python
 
 '''
-Created on Sep 1, 2010
+Implementation of the Miner Method for qPCR crossing-point determination.
+
+Provides the four-parameter logistic (4PL) model (``qpcrFit``), an
+exponential-phase nonlinear regression model (``nlmFit``), and three
+crossing-point estimation methods derived from the fitted 4PL parameters:
+
+- FDM (First Derivative Maximum)
+- SDM (Second Derivative Maximum)
+- SPE (Signal-to-noise / Percentage of Efficiency)
+
+Also contains an example fit executed at import time using a hard-coded
+sample fluorescence curve (``myData``).
+
+Reference: Zhao & Fernald (2005). "Comprehensive algorithm for
+quantitative real-time polymerase chain reaction." J Comput Biol.
 
 @author: lgoff
 '''
@@ -24,6 +38,15 @@
 #Misc
 #########
 def nthRoot(num,n):
+    """Compute the nth root of a number.
+
+    Args:
+        num: The base value (numeric).
+        n: The root degree (numeric, must not be zero).
+
+    Returns:
+        ``num ** (1.0 / n)`` as a float.
+    """
     return num ** (1.0/n)
 
 #############
@@ -33,45 +56,138 @@ def nthRoot(num,n):
 #errfunc = lambda p,x,y: y-fitfunc(p,x) #Distance to the target function (residuals)
 
 def fit(p,x):
-    """
-    Depricated in favor of qpcrFit to use optimize.curve_fit()
-    f(x) Logistic model for qPCR Data
-    fitfunc = lambda p,x: p[3]+(p[0]/(1+((x/p[2])**p[1]))) # From actual paper (Zhao et al) where p = [a,b,x_0,y_0]
+    """Evaluate the four-parameter logistic (4PL) model using a parameter vector.
+
+    Deprecated in favor of ``qpcrFit``, which is compatible with
+    ``scipy.optimize.curve_fit``.
+
+    The model is:
+        f(x) = p[3] + p[0] / (1 + (x / p[2])^p[1])
+
+    where ``p = [a, b, x0, y0]`` following the notation in Zhao et al.
+
+    Args:
+        p: Sequence of four model parameters ``[a, b, x0, y0]``:
+            a  – amplitude (difference between upper and lower asymptotes),
+            b  – slope/steepness,
+            x0 – inflection point (cycle at midpoint),
+            y0 – baseline fluorescence (lower asymptote).
+        x: Cycle number (scalar or array).
+
+    Returns:
+        Predicted fluorescence value(s) at cycle ``x``.
     """
     return (p[3]+(p[0]/(1+((x/p[2])**p[1]))))
 
 def qpcrFit(x,a,b,x0,y0):
-    """Same as fit but designed to run with optimize.curve_fit"""
+    """Evaluate the four-parameter logistic (4PL) model for qPCR fluorescence data.
+
+    Implements the model from Zhao et al.:
+        f(x) = y0 + a / (1 + (x / x0)^b)
+
+    Designed for use with ``scipy.optimize.curve_fit``.
+
+    Args:
+        x: Cycle number (scalar or array).
+        a: Amplitude parameter (difference between upper and lower
+           asymptotes).
+        b: Slope/steepness parameter.
+        x0: Inflection point (cycle at the midpoint of the curve).
+        y0: Baseline fluorescence (lower asymptote).
+
+    Returns:
+        Predicted fluorescence value(s) at cycle ``x``.
+    """
     return (y0+(a/(1+((x/x0)**b))))
 
 def qpcrFitResiduals(x,y,a,b,x0,y0):
-    """
-    Residuals:
-    errfunc = lambda p,x,y: y-fitfunc(p,x) #Distance to the target function (residuals)
+    """Compute residuals between observed fluorescence and the 4PL model.
+
+    Calculates ``y - qpcrFit(x, a, b, x0, y0)``.
+
+    Args:
+        x: Cycle number(s) (scalar or array).
+        y: Observed fluorescence value(s).
+        a: Amplitude parameter.
+        b: Slope/steepness parameter.
+        x0: Inflection point (cycle at midpoint).
+        y0: Baseline fluorescence (lower asymptote).
+
+    Returns:
+        Residual value(s) ``y - predicted``.
     """
     return y-qpcrFit(x,a,b,x0,y0)
 
 def nlmFit(x,a,b,y0):
-    """
-    Non-linear regression function to optimize for windows in exponential phase
-    here p = [a,b,y0]
+    """Evaluate the exponential nonlinear regression model for the exponential phase.
+
+    Models the exponential amplification phase as:
+        f(x) = y0 + a * (b ^ x)
+
+    Used for iterative nonlinear regression (iNLR) on windows within the
+    exponential phase. Parameters are ``[a, b, y0]``.
+
+    Args:
+        x: Cycle number (scalar or array).
+        a: Amplitude scaling factor.
+        b: Per-cycle amplification factor (related to efficiency: b ~ E).
+        y0: Baseline offset.
+
+    Returns:
+        Predicted fluorescence value(s) at cycle ``x``.
     """
     return y0+(a*(b**x))
 
 def nlmFitResiduals(x,y,a,b,y0):
-    """
-    Residuals:
-    errfunc = lambda p,x,y: y-nlmFit(x,a,b,y0) #Distance to the target function (residuals)
+    """Compute residuals between observed fluorescence and the exponential NLM model.
+
+    Calculates ``y - nlmFit(x, a, b, y0)``.
+
+    Args:
+        x: Cycle number(s) (scalar or array).
+        y: Observed fluorescence value(s).
+        a: Amplitude scaling factor.
+        b: Per-cycle amplification factor.
+        y0: Baseline offset.
+
+    Returns:
+        Residual value(s) ``y - predicted``.
     """
     return y-nlmFit(x,a,b,y0)
 
 def CP_FDM(p):
+    """Compute the crossing-point using the First Derivative Maximum (FDM) method.
+
+    Args:
+        p: Sequence of four fitted 4PL parameters ``[a, b, x0, y0]``.
+
+    Returns:
+        The FDM crossing-point cycle number as a float.
+    """
     return (p[2]*nthRoot(((p[1]-1)/(p[1]+1)),p[1]))
 
 def CP_SDM(p):
+    """Compute the crossing-point using the Second Derivative Maximum (SDM) method.
+
+    Args:
+        p: Sequence of four fitted 4PL parameters ``[a, b, x0, y0]``.
+
+    Returns:
+        The SDM crossing-point cycle number as a float.
+    """
     return p[2]*nthRoot((np.sqrt((3*p[1]**2)*(p[1]**2-1))-(2*(1-p[1]**2)))/((p[1]**2)+(3*p[1])+2),p[1])
 
 def CP_SPE(p,rNoise):
+    """Compute the crossing-point using the Signal-to-Noise (SPE) method.
+
+    Args:
+        p: Sequence of four fitted 4PL parameters ``[a, b, x0, y0]``.
+        rNoise: Baseline noise estimate (standard error of the ``y0``
+            parameter, i.e., ``RNoise``).
+
+    Returns:
+        The SPE crossing-point cycle number as a float.
+    """
     return (p[2]*nthRoot(((p[0]-rNoise)/rNoise),p[1]))
 
 
diff --git a/src/qpcr/abi.py b/src/qpcr/abi.py
index 99e7499..8af1a5f 100644
--- a/src/qpcr/abi.py
+++ b/src/qpcr/abi.py
@@ -1,6 +1,12 @@
 #!/usr/bin/env python
 '''
-Created on Feb 22, 2010
+Utilities for parsing and analyzing ABI qPCR instrument output.
+
+Provides functions for parsing raw ABI results and cycle data files,
+computing PCR amplification efficiencies via a sliding-window linear
+regression on log-transformed fluorescence values, performing the
+delta-delta Ct (ddCt) relative-quantification calculation, and
+summarizing/reporting the results.
 
 Requirements:
     - numpy
@@ -46,8 +52,21 @@
 ##########################
 
 def parseData(fname):
-    """Raw input for this file is a matrix of well x (Well,SampleName,DetectorName,Task,Ct,Threshold).  You must also delete the intermediate headers and summary rows from raw output of ABI.
-    Be sure to remove the header section (except one header row).
+    """Parse a simplified ABI results text file into a list of well dictionaries.
+
+    Raw input is a tab-delimited matrix with columns:
+    Well, SampleName, DetectorName, Task, Ct, Threshold.
+    Intermediate headers and summary rows must be removed from the raw ABI
+    output before calling this function; only one header row should remain.
+    Wells with an ``Undetermined`` Ct value are silently skipped.
+
+    Args:
+        fname: Path to the tab-delimited results text file.
+
+    Returns:
+        A list of dicts, one per well, with keys ``well`` (int),
+        ``sample``, ``detector``, ``task``, ``Ct`` (float), and
+        ``threshold`` (float).
     """
     data = []
     handle = open(fname,'r')
@@ -65,6 +84,18 @@ def parseData(fname):
     return data
 
 def getDetAndSamp(data):
+    """Return ordered lists of unique detector and sample names found in the data.
+
+    Preserves first-seen order for both detectors and samples.
+
+    Args:
+        data: List of well dicts as returned by ``parseData``, each containing
+            ``detector`` and ``sample`` keys.
+
+    Returns:
+        A tuple ``(detectors, samples)`` where each element is a list of
+        unique string names in the order they were first encountered.
+    """
     detectors = []
     samples = []
     for well in data:
@@ -75,13 +106,33 @@ def getDetAndSamp(data):
     return detectors,samples
 
 def wellIndex(data):
+    """Build a list of well numbers in the same order as the data list.
+
+    Args:
+        data: List of well dicts, each containing a ``well`` key.
+
+    Returns:
+        A list of integer well numbers corresponding positionally to each
+        entry in ``data``.
+    """
     index = []
     for i in range(len(data)):
         index.append(data[i]['well'])
     return index
 
 def parseCycleData(fname):
-    """Raw input is tab-delimited text file with matrix of WellsxCycle values.  Header row is included.
+    """Parse a tab-delimited cycle fluorescence file into a list of well dicts.
+
+    Raw input is a tab-delimited file with a header row. Columns are:
+    Well, Sample, Detector, followed by one column per cycle number.
+
+    Args:
+        fname: Path to the tab-delimited cycle data text file.
+
+    Returns:
+        A list of dicts, one per well, with keys ``well`` (int),
+        ``sample`` (str), ``detector`` (str), and ``values`` (numpy array
+        of float fluorescence readings, one per cycle).
     """
     cycleData = []
     handle = open(fname,'r')
@@ -105,6 +156,17 @@ def parseCycleData(fname):
 #Get User Input
 ######################
 def getEndoControl(detectors):
+    """Interactively prompt the user to select an endogenous control detector.
+
+    Prints a numbered list of detector names and reads an integer choice from
+    standard input.
+
+    Args:
+        detectors: List of detector name strings to present to the user.
+
+    Returns:
+        The detector name string chosen by the user.
+    """
     myString = "Please choose an endogenous control:\n"
     for i in range(0,len(detectors)):
         myString = myString+"\t(%d):\t%s\n" % (i,detectors[i])
@@ -113,6 +175,17 @@ def getEndoControl(detectors):
     return detectors[choice]
 
 def getReference(samples):
+    """Interactively prompt the user to select a reference sample.
+
+    Prints a numbered list of sample names and reads an integer choice from
+    standard input.
+
+    Args:
+        samples: List of sample name strings to present to the user.
+
+    Returns:
+        The sample name string chosen by the user.
+    """
     myString = "Please choose a reference sample:\n"
     for i in range(0,len(samples)):
         myString = myString + "\t(%d):\t%s\n" % (i,samples[i])
@@ -125,6 +198,19 @@ def getReference(samples):
 #####################################
 
 def aggregateReplicateCts(data):
+    """Aggregate replicate Ct values per sample/detector pair using the median.
+
+    Groups raw per-well Ct values by (sample, detector) and computes the
+    median Ct for each combination.
+
+    Args:
+        data: List of well dicts, each containing ``sample``, ``detector``,
+            and ``Ct`` keys.
+
+    Returns:
+        A nested dict ``{sample: {detector: median_Ct}}`` where each value
+        is the median Ct (float) computed from all replicate wells.
+    """
     #TODO: make this aggregate either Ct values or N0 values?
     tmp = {}
     for d in data:
@@ -146,8 +232,28 @@ def aggregateReplicateCts(data):
 #####################################
 
 def calculateEfficiencies(cycleData):
-    """Takes a list of dictionaries of cycle information by well and returns those same dictionaries with
-    additional keys for efficiency and concentration (N0) values."""
+    """Compute PCR amplification efficiency and initial concentration (N0) for each well.
+
+    For each well, log10-transforms the fluorescence values, then slides a
+    window of size ``windowSize`` across all cycles and picks the window with
+    the highest Pearson correlation between log-fluorescence and cycle number
+    (i.e., the most linear exponential-phase segment). A linear regression on
+    that best window gives the slope (from which efficiency = 10^slope) and
+    intercept (from which N0 = 10^intercept).
+
+    Adds the following keys to each well dict in-place:
+        ``logVals``, ``bestIdx``, ``bestCorr``, ``bestSlice``,
+        ``bestCycles``, ``bestSlope``, ``bestIntercept``,
+        ``efficiency``, ``N0``.
+
+    Args:
+        cycleData: List of well dicts as returned by ``parseCycleData``,
+            each containing at minimum a ``values`` numpy array.
+
+    Returns:
+        The same list of well dicts with the additional efficiency and N0
+        keys populated.
+    """
     res = []
     for well in cycleData:
         well['logVals'] = getLogVals(well['values'])
@@ -172,6 +278,20 @@ def calculateEfficiencies(cycleData):
     return res
 
 def summarizeEfficiencies(cycleData):
+    """Compute mean and standard deviation of PCR efficiency for each detector.
+
+    Groups per-well efficiency values by detector name and computes summary
+    statistics.
+
+    Args:
+        cycleData: List of well dicts, each containing ``detector`` and
+            ``efficiency`` keys (as produced by ``calculateEfficiencies``).
+
+    Returns:
+        A dict ``{detector: {'meanEff': float, 'sdevEff': float}}`` giving
+        the mean and standard deviation of efficiency across all wells for
+        each detector.
+    """
     tmp = {}
     #Aggregate efficiencies by detector
     for i in cycleData:
@@ -184,8 +304,24 @@ def summarizeEfficiencies(cycleData):
     return eff
 
 def mergeDataAndCycleData(data,cycleData,idx):
-    """Takes an index of data (by well) and the cycleData to add the efficiency and N0 from cycleData to the
-    data dictionaries"""
+    """Copy efficiency and N0 values from cycleData into the matching well dicts in data.
+
+    Uses the provided well-number index to look up each cycle-data well in
+    the data list and transfers the ``N0`` and ``efficiency`` values. Wells
+    present in cycleData but absent from data (e.g., wells skipped due to
+    undetermined Ct) are silently ignored.
+
+    Args:
+        data: List of well dicts as returned by ``parseData``.
+        cycleData: List of well dicts as returned by ``calculateEfficiencies``,
+            each containing ``well``, ``N0``, and ``efficiency`` keys.
+        idx: List of integer well numbers parallel to ``data``, as returned
+            by ``wellIndex``.
+
+    Returns:
+        The ``data`` list with ``N0`` and ``efficiency`` keys added to each
+        matched well dict.
+    """
     for c in cycleData:
         try:
             dataloc = idx.index(c['well'])
@@ -198,12 +334,45 @@ def mergeDataAndCycleData(data,cycleData,idx):
 #TODO: Make summarizer for N0 elements by sample and detector
 
 def getLogVals(myArray):
+    """Return the base-10 logarithm of each element in a numpy array.
+
+    Args:
+        myArray: A numpy array of positive numeric values.
+
+    Returns:
+        A numpy array of the same shape containing log10 of each input value.
+    """
     return np.log10(myArray)
 
 ###############################
 #ddCt math
 ###############################
 def ddCt(data,medianCts,endoControl,reference):
+    """Compute delta-Ct and delta-delta-Ct values for each well.
+
+    For each well, dCt is calculated as:
+        dCt = Ct - median_Ct(sample, endoControl)
+
+    ddCt is then calculated as:
+        ddCt = dCt - median_dCt(reference, detector)
+
+    Wells where the endogenous control Ct is unavailable receive ``"N/A"``
+    for dCt, and wells where the reference dCt is unavailable receive
+    ``"N/A"`` for ddCt.
+
+    Args:
+        data: List of well dicts, each containing ``sample``, ``detector``,
+            and ``Ct`` keys.
+        medianCts: Nested dict ``{sample: {detector: median_Ct}}`` as returned
+            by ``aggregateReplicateCts``.
+        endoControl: Name of the endogenous control detector to use for
+            normalization.
+        reference: Name of the reference sample to use for ddCt calculation.
+
+    Returns:
+        The ``data`` list with ``dCt`` and ``ddCt`` keys added to each well
+        dict (values are floats or ``"N/A"``).
+    """
     tmp = {}
     #Calculate dCts
     for i in range(len(data)):
@@ -230,6 +399,24 @@ def ddCt(data,medianCts,endoControl,reference):
     return data
 
 def RQ(data,effs):
+    """Calculate relative quantification (RQ) values for each well.
+
+    RQ is computed as:
+        RQ = meanEfficiency ^ (-ddCt)
+
+    Wells with a ``"N/A"`` ddCt or a missing efficiency entry receive
+    ``"N/A"`` for RQ.
+
+    Args:
+        data: List of well dicts containing ``detector`` and ``ddCt`` keys,
+            as returned by ``ddCt``.
+        effs: Dict ``{detector: {'meanEff': float, ...}}`` as returned by
+            ``summarizeEfficiencies``.
+
+    Returns:
+        The ``data`` list with an ``RQ`` key added to each well dict
+        (float or ``"N/A"``).
+    """
     res = []
     for d in data:
         try:
@@ -247,7 +434,14 @@ def RQ(data,effs):
 ###############################
 
 def mean(vals):
-    """Computes the mean of a list of numbers"""
+    """Compute the arithmetic mean of a list of numbers.
+
+    Args:
+        vals: An iterable of numeric values.
+
+    Returns:
+        The arithmetic mean as a float.
+    """
     n = 0
     s = 0.0
     for i in vals:
@@ -256,7 +450,17 @@ def mean(vals):
     return s / float(n)
 
 def median(vals):
-    """Computes the median of a list of numbers"""
+    """Compute the median of a list of numbers.
+
+    Sorts the list in-place before computing.
+
+    Args:
+        vals: A list of numeric values.
+
+    Returns:
+        The median value as a float. For even-length lists, returns the
+        average of the two middle values.
+    """
     lenvals = len(vals)
     vals.sort()
 
@@ -266,17 +470,46 @@ def median(vals):
         return vals[lenvals // 2]
 
 def variance(vals):
-    """Variance"""
+    """Compute the sample variance of a list of numbers.
+
+    Uses Bessel's correction (divides by N-1).
+
+    Args:
+        vals: A list of numeric values with at least two elements.
+
+    Returns:
+        The sample variance as a float.
+    """
     u = mean(vals)
     return sum((x - u)**2 for x in vals) / float(len(vals)-1)
 
 def sdev(vals):
-    """Standard deviation"""
+    """Compute the sample standard deviation of a list of numbers.
+
+    Returns 0.0 for lists with one or fewer elements.
+
+    Args:
+        vals: A list of numeric values.
+
+    Returns:
+        The sample standard deviation as a float.
+    """
     if len(vals) <=1: return 0.0
     return math.sqrt(variance(vals))
 
 def covariance(lst1, lst2):
-    """Covariance"""
+    """Compute the sample covariance between two equal-length lists.
+
+    Uses Bessel's correction (divides by N-1).
+
+    Args:
+        lst1: First list of numeric values.
+        lst2: Second list of numeric values; must be the same length as
+            ``lst1``.
+
+    Returns:
+        The sample covariance as a float.
+    """
     m1 = mean(lst1)
     m2 = mean(lst2)
     tot = 0.0
@@ -285,7 +518,21 @@ def covariance(lst1, lst2):
     return tot / (len(lst1)-1)
 
 def corr(lst1, lst2):
-    """Pearson's Correlation"""
+    """Compute the Pearson correlation coefficient between two lists.
+
+    Returns a very large number (1e1000) when the denominator is zero
+    (i.e., one or both lists have zero variance), which is used as a
+    sentinel for a perfect linear relationship in the sliding-window search.
+
+    Args:
+        lst1: First list of numeric values.
+        lst2: Second list of numeric values; must be the same length as
+            ``lst1``.
+
+    Returns:
+        The Pearson correlation coefficient as a float, or 1e1000 when the
+        standard deviation of either list is zero.
+    """
     num = covariance(lst1, lst2)
     denom = float(sdev(lst1) * sdev(lst2))
     if denom != 0:
@@ -294,13 +541,38 @@ def corr(lst1, lst2):
         return 1e1000
 
 def slope(xarray,yarray):
-    """Uses numpy, in fact assumes that the list arguments are numpy arrays."""
+    """Compute the ordinary least-squares regression slope.
+
+    Uses the standard closed-form formula. Requires numpy arrays because
+    element-wise multiplication (``xarray * yarray``) and vectorized
+    ``sum`` are used.
+
+    Args:
+        xarray: Numpy array of independent variable values.
+        yarray: Numpy array of dependent variable values; must be the same
+            length as ``xarray``.
+
+    Returns:
+        The regression slope as a float.
+    """
     n = float(len(xarray))
     m = (n*sum(xarray*yarray)-sum(xarray)*sum(yarray))/(n*sum(xarray**2)-(sum(xarray))**2)
     return m
 
 def intercept(xarray,yarray):
-    """Uses numpy, in fact assumes that the list arguments are numpy arrays."""
+    """Compute the ordinary least-squares regression intercept.
+
+    Uses the standard closed-form formula given the slope. Requires numpy
+    arrays because vectorized ``sum`` is used.
+
+    Args:
+        xarray: Numpy array of independent variable values.
+        yarray: Numpy array of dependent variable values; must be the same
+            length as ``xarray``.
+
+    Returns:
+        The regression intercept (y-axis) as a float.
+    """
     m = slope(xarray,yarray)
     n = float(len(xarray))
     b = (sum(yarray)-m*(sum(xarray)))/n
@@ -311,9 +583,35 @@ def intercept(xarray,yarray):
 ###############################
 
 def flagBadDetectors():
+    """Flag detectors with poor amplification characteristics.
+
+    Not yet implemented.
+    """
     pass
 
 def aggregateResults(data):
+    """Aggregate per-well RQ, N0, and dCt values into per-(sample, detector) summaries.
+
+    Computes mean, median, and standard deviation of RQ, dCt, and N0
+    for every (sample, detector) combination across all replicate wells.
+    Wells with ``"N/A"`` RQ are excluded from RQ and dCt summaries but N0
+    is always summarized (N0 values are assumed to be present).
+
+    Args:
+        data: List of well dicts containing ``sample``, ``detector``,
+            ``RQ``, ``N0``, and ``dCt`` keys, as returned by ``RQ``.
+
+    Returns:
+        A nested dict ``{sample: {detector: stats_dict}}`` where
+        ``stats_dict`` contains the keys: ``medianRQ``, ``meanRQ``,
+        ``sdevRQ``, ``mediandCt``, ``meandCt``, ``sdevdCt``,
+        ``medianN0``, ``meanN0``, ``sdevN0``. Unavailable values are
+        represented as ``"N/A"``.
+
+    Raises:
+        KeyError: If ``RQ`` values have not yet been computed on the data
+            (i.e., ``ddCt`` and ``RQ`` have not been called first).
+    """
     try:
         data[0]['RQ']
     except KeyError:
@@ -377,6 +675,22 @@ def aggregateResults(data):
     return res
 
 def printDataFrameRQs(RQsummary,effs,outFile):
+    """Write a tab-delimited summary of RQ results to a file and to stdout.
+
+    Outputs one row per (sample, detector) combination with columns:
+    Sample, Detector, meanEff, meanRQ, sdevRQ, medianRQ, meandCt,
+    mediandCt, sdevdCt, quant, ci.l, ci.u.
+
+    The ``quant`` column is efficiency^(-mediandCt); ``ci.l`` and ``ci.u``
+    are efficiency^(-(mediandCt +/- sdevdCt)), providing approximate
+    confidence intervals.
+
+    Args:
+        RQsummary: Nested dict as returned by ``aggregateResults``.
+        effs: Dict ``{detector: {'meanEff': float, ...}}`` as returned by
+            ``summarizeEfficiencies``.
+        outFile: Path to the output file to write.
+    """
     #Open out Handle
     outHandle = open(outFile,'w')
     #Print header row
@@ -395,16 +709,58 @@ def printDataFrameRQs(RQsummary,effs,outFile):
 #TODO:Create R Function to plot output from printDataFramRQs()
 
 def plotRQs(results):
+    """Plot relative quantification (RQ) values.
+
+    Not yet implemented.
+
+    Args:
+        results: Aggregated results dict as returned by ``aggregateResults``.
+    """
     pass
 
 def plotEdCt(results):
+    """Plot efficiency-corrected delta-Ct (EdCt) values.
+
+    Not yet implemented.
+
+    Args:
+        results: Aggregated results dict as returned by ``aggregateResults``.
+    """
     pass
 
 def doPlotting(plotScript = "plotting.q"):
+    """Execute an external R plotting script as a subprocess.
+
+    Args:
+        plotScript: Path to the R script to execute. Defaults to
+            ``"plotting.q"``.
+
+    Returns:
+        A tuple ``(status, output)`` as returned by
+        ``subprocess.getstatusoutput``.
+    """
     return subprocess.getstatusoutput(plotScript)
 
 
 def makeDvsS(results,detectors,samples,value = "mediandCt"):
+    """Build a detector-by-sample matrix of a chosen summary statistic.
+
+    Creates a 2-D numpy array indexed by detector (rows) and sample
+    (columns). Missing (sample, detector) combinations are filled with
+    ``nan``.
+
+    Args:
+        results: Nested dict ``{sample: {detector: stats_dict}}`` as
+            returned by ``aggregateResults``.
+        detectors: Ordered list of detector names defining the row order.
+        samples: Ordered list of sample names defining the column order.
+        value: Key within the innermost stats dict to extract. Defaults to
+            ``"mediandCt"``.
+
+    Returns:
+        A numpy float array of shape ``(len(detectors), len(samples))``
+        containing the requested statistic for each cell.
+    """
     matrix = np.zeros((len(detectors),len(samples)),float)
     for d in range(0,len(detectors)):
         for s in range(0,len(samples)):
@@ -419,6 +775,17 @@ def makeDvsS(results,detectors,samples,value = "mediandCt"):
 ##############################
 
 def main(mainFile,cycleFile):
+    """Run the full ABI qPCR analysis pipeline interactively.
+
+    Parses results and cycle-data files, computes efficiencies,
+    interactively asks the user to select an endogenous control and reference
+    sample, performs ddCt/RQ calculations, and writes ``output.txt`` before
+    running the external plotting script.
+
+    Args:
+        mainFile: Path to the tab-delimited ABI results file.
+        cycleFile: Path to the tab-delimited cycle fluorescence file.
+    """
     #Parse mainFile
     print("Parsing Results File...")
     data = parseData(mainFile)
@@ -455,6 +822,17 @@ def main(mainFile,cycleFile):
     return
 
 def test():
+    """Run a manual integration test using hard-coded HeLa RIP data files.
+
+    Parses ``'RIP HeLa clipped.txt'`` and ``'new_RIP_HeLa.txt'``, runs the
+    full ddCt/RQ pipeline with hard-coded endogenous control (``'hGAPDH'``)
+    and reference sample (``'IgG RIP'``), writes ``output.txt``, and
+    returns a detector-by-sample matrix of mediandCt values.
+
+    Returns:
+        A numpy float array of shape ``(n_detectors, n_samples)`` containing
+        the mediandCt for each (detector, sample) combination.
+    """
     cycleData = parseCycleData('RIP HeLa clipped.txt')
     cycleData = calculateEfficiencies(cycleData)
     effs = summarizeEfficiencies(cycleData)
diff --git a/src/qpcr/qpcrAnalysis.py b/src/qpcr/qpcrAnalysis.py
index 9072c9d..d4f2a15 100644
--- a/src/qpcr/qpcrAnalysis.py
+++ b/src/qpcr/qpcrAnalysis.py
@@ -1,12 +1,19 @@
 #!/usr/bin/env python
 '''
-Created on Feb 22, 2010
+Core qPCR analysis module using four-parameter logistic modelling and iterative
+nonlinear regression for efficiency estimation.
+
+Provides the ``Well`` class for per-well data storage and curve fitting, along
+with standalone functions for parsing raw ABI instrument output, performing
+delta-delta Ct (ddCt) relative quantification, and reporting results.
+
+This module extends the functionality in ``abi.py`` with a more rigorous
+curve-fitting approach based on the four-parameter logistic (4PL) model
+described in Zhao et al.
 
 Requirements:
     - numpy
-    - rpy
-    - R (obviously)
-        - lattice package (for plotting)
+    - scipy
 
 results.txt input format example (tab-delimited):
 Well    Sample      Detector      Task      Ct    Threshold
@@ -49,7 +56,37 @@
 #Classes
 ##########################
 class Well:
+    """Represents a single PCR well with its raw data and fitted curve parameters.
+
+    Stores metadata (sample name, detector, task, etc.), raw fluorescence
+    readings keyed by cycle, and all intermediate and final results from
+    four-parameter logistic curve fitting and crossing-point estimation.
+
+    Attributes:
+        wellNum: Integer well number (defaults to -1 until populated).
+        sample: Sample name string.
+        detector: Detector (primer/probe) name string.
+        reporter: Reporter dye name string.
+        task: Task type string (e.g., ``"EndogenousControl"``).
+        Ct: Threshold cycle value (float).
+        quantity: Quantity value from ABI output (float).
+        eff: Amplification efficiency (float).
+        threshold: Fluorescence threshold (float).
+        cycles: List of cycle labels from the cycle data file.
+        fluorData: Numpy array of fluorescence readings per cycle.
+        flags: Dict of quality-flag name/value pairs parsed from the ABI file.
+        RNoise: Standard error of the baseline fluorescence parameter (y0)
+            from the fitted 4PL model; None until ``fitPCRCurve`` is called.
+    """
+
     def __init__(self,line):
+        """Initialise a Well with default empty values.
+
+        Args:
+            line: The raw text line from the ABI file used to create this
+                well (stored for reference but not parsed here; parsing is
+                done by ``parseRawABI``).
+        """
         self.wellNum = -1
         self.sample = ''
         self.detector = ''
@@ -65,12 +102,42 @@ def __init__(self,line):
         self.RNoise = None
 
     def estimateParams(self):
+        """Generate initial parameter guesses for the four-parameter logistic model.
+
+        Estimates starting values for the curve-fitting routine based on
+        simple statistics of the raw fluorescence data:
+
+        - ``y0``: mean of the first five cycles (baseline fluorescence).
+        - ``x0``: cycle nearest the midpoint fluorescence (inflection point).
+        - ``a``: dynamic range (max minus min fluorescence).
+        - ``b``: set to 0 (the optimiser handles this parameter well without
+          a manual initial estimate).
+
+        Populates the instance attributes ``y0``, ``x0``, ``a``, and ``b``
+        in-place.
+        """
         self.y0 = np.mean(self.fluorData[:5]) # Initial guess as to baseline fluorescence (mean of first five cycles)
         self.x0 = self.cycles[np.argmin(abs(self.fluorData-np.mean(self.fluorData)))] # Initial guess as to inflection point at middle of curve
         self.a = (np.max(self.fluorData)-np.min(self.fluorData))# Initial guess as to y value at inflection
         self.b = 0 # Don't think I need to estimate this parameter, model seems to do a good job of fitting this one.
 
     def fitPCRCurve(self):
+        """Fit the four-parameter logistic (4PL) model to the fluorescence data.
+
+        Calls ``scipy.optimize.curve_fit`` with ``qpcrFit`` as the model
+        function and up to 5000 function evaluations. After fitting,
+        updates the instance attributes:
+
+        - ``a``, ``b``, ``x0``, ``y0``: fitted model parameters.
+        - ``pCov``: covariance matrix of the fitted parameters.
+        - ``fitData``: list of model-predicted fluorescence values at each
+          cycle.
+        - ``paramSE``: dict mapping parameter names (``'a'``, ``'b'``,
+          ``'x0'``, ``'y0'``) to their standard errors (sqrt of the
+          diagonal of ``pCov``).
+        - ``RNoise``: standard error of the ``y0`` parameter, used as an
+          estimate of baseline noise.
+        """
         #Fit qpcr Model
         newParams,self.pCov = optimize.curve_fit(qpcrFit,xdata=self.cycles,ydata=self.fluorData,maxfev=5000)
         #Update params
@@ -87,18 +154,54 @@ def fitPCRCurve(self):
         return
 
     def CP_FDM(self):
+        """Compute the crossing-point by the First Derivative Maximum (FDM) method.
+
+        Calculates the cycle number at which the first derivative of the
+        fitted 4PL curve is maximised, stored in ``self.FDM``.
+
+        Returns:
+            The FDM crossing-point cycle number as a float.
+        """
         self.FDM = (self.x0*nthRoot(((self.b-1)/(self.b+1)),self.b))
         return self.FDM
 
     def CP_SDM(self):
+        """Compute the crossing-point by the Second Derivative Maximum (SDM) method.
+
+        Calculates the cycle number at which the second derivative of the
+        fitted 4PL curve is maximised, stored in ``self.SDM``.
+
+        Returns:
+            The SDM crossing-point cycle number as a float.
+        """
         self.SDM = self.x0*nthRoot((np.sqrt((3*self.b**2)*(self.b**2-1))-(2*(1-self.b**2)))/((self.b**2)+(3*self.b)+2),self.b)
         return self.SDM
 
     def CP_SPE(self):
+        """Compute the crossing-point by the Signal-to-Noise method (SPE).
+
+        Calculates the cycle at which the fluorescence signal exceeds the
+        baseline noise by a factor of ``a / RNoise``, stored in ``self.SPE``.
+        Requires that ``fitPCRCurve`` has been called so that ``RNoise`` is
+        available.
+
+        Returns:
+            The SPE crossing-point cycle number as a float.
+        """
         self.SPE = (self.x0*nthRoot(((self.a-self.RNoise)/self.RNoise),self.b))
         return self.SPE
 
     def iterativeNLR(self):
+        """Perform iterative nonlinear regression over the exponential phase window.
+
+        Uses the SPE and SDM crossing-point estimates to define the lower and
+        upper cycle boundaries of the exponential phase. Enumerates all
+        sub-windows of size >= ``windowSize`` within that range using
+        combinatorics and stores the window indices in ``self.winIdx``.
+
+        Requires that ``CP_SPE`` and ``CP_SDM`` have been called first to
+        populate ``self.SPE`` and ``self.SDM``.
+        """
         self.lowerCycleNum = int(self.SPE)
         self.upperCycleNum = int(self.SDM)
         self.regPoints = self.upperCycleNum-self.lowerCycleNum+1
@@ -115,7 +218,21 @@ def iterativeNLR(self):
 #Parsing
 ##########################
 def parseRawABI(fname):
-    """This replaces parseData"""
+    """Parse a raw ABI results file into a dict of Well objects keyed by well number.
+
+    Replaces the simpler ``parseData`` function. Handles the multi-section ABI
+    export format: skips the first line, collects key/value header metadata,
+    then reads data rows until EOF. Rows with an ``"Undetermined"`` Ct value
+    are skipped. Quality-flag columns (indices 17 onwards) are stored in each
+    ``Well.flags`` dict.
+
+    Args:
+        fname: Path to the raw ABI tab-delimited results export file.
+
+    Returns:
+        A dict ``{well_number (int): Well}`` for every well with a valid
+        numeric Ct value.
+    """
     dictKeys = ['well','sample','detector','reporter','task','Ct','quantity','Qty Mean','Qty StdDev','Ct Median','Ct Mean','Ct StdDev','Baseline Type','Baseline Start','Baseline Stop','Threshold Type','threshold','FOS','HMD','LME','EW','BPR','NAW','HNS','HRN','EAF','BAF','TAF','CAF']
     handle = open(fname,'r')
     header = {}
@@ -171,7 +288,18 @@ def parseRawABI(fname):
     assert False, "Should not reach this line..."
 
 def parseRawCycle(fname,wellData):
-    """This replaces parseCycleData"""
+    """Parse a raw ABI cycle fluorescence file and populate the matching Well objects.
+
+    Replaces the simpler ``parseCycleData`` function. Reads fluorescence
+    readings up to (but not including) the ``"Delta Rn"`` column and writes
+    ``cycles`` and ``fluorData`` directly onto the corresponding ``Well``
+    objects in ``wellData``.
+
+    Args:
+        fname: Path to the raw ABI cycle data tab-delimited export file.
+        wellData: Dict ``{well_number: Well}`` as returned by
+            ``parseRawABI``. Modified in-place.
+    """
     handle = open(fname,'r')
     handle.readline()#Remove first line
     headerRow = handle.readline()
@@ -187,12 +315,33 @@ def parseRawCycle(fname,wellData):
     return
 
 def getDetAndSamp(wellData):
-    """Returns two lists of unique detectors and unique samples"""
+    """Return lists of unique detector and sample names from a collection of Well objects.
+
+    Uses ``util.uniqify`` to deduplicate; result order is not guaranteed to
+    be preserved (depends on dict key ordering).
+
+    Args:
+        wellData: An iterable of ``Well`` objects (e.g., the values of the
+            dict returned by ``parseRawABI``).
+
+    Returns:
+        A tuple ``(detectors, samples)`` where each element is a list of
+        unique string names.
+    """
     detectors = util.uniqify(detectors = [x.detector for x in wellData])
     samples = util.uniqify(samples = [x.sample for x in wellData])
     return detectors,samples
 
 def wellIndex(data):
+    """Build a list of well numbers in the same order as the data list.
+
+    Args:
+        data: List of well dicts, each containing a ``well`` key.
+
+    Returns:
+        A list of integer well numbers corresponding positionally to each
+        entry in ``data``.
+    """
     index = []
     for i in range(len(data)):
         index.append(data[i]['well'])
@@ -202,6 +351,17 @@ def wellIndex(data):
 #Get User Input
 ######################
 def getEndoControl(detectors):
+    """Interactively prompt the user to select an endogenous control detector.
+
+    Prints a numbered list of detector names and reads an integer choice from
+    standard input.
+
+    Args:
+        detectors: List of detector name strings to present to the user.
+
+    Returns:
+        The detector name string chosen by the user.
+    """
     myString = "Please choose an endogenous control:\n"
     for i in range(0,len(detectors)):
         myString = myString+"\t(%d):\t%s\n" % (i,detectors[i])
@@ -210,6 +370,17 @@ def getEndoControl(detectors):
     return detectors[choice]
 
 def getReference(samples):
+    """Interactively prompt the user to select a reference sample.
+
+    Prints a numbered list of sample names and reads an integer choice from
+    standard input.
+
+    Args:
+        samples: List of sample name strings to present to the user.
+
+    Returns:
+        The sample name string chosen by the user.
+    """
     myString = "Please choose a reference sample:\n"
     for i in range(0,len(samples)):
         myString = myString + "\t(%d):\t%s\n" % (i,samples[i])
@@ -222,6 +393,20 @@ def getReference(samples):
 #####################################
 
 def aggregateReplicateCts(data):
+    """Aggregate replicate Ct values per sample/detector pair using the median.
+
+    Groups raw per-well Ct values by (sample, detector) and computes the
+    median Ct for each combination. ``"N/A"`` values (from undetermined wells
+    that slipped through) are silently dropped by the ``median`` helper.
+
+    Args:
+        data: List of well dicts, each containing ``sample``, ``detector``,
+            and ``Ct`` keys.
+
+    Returns:
+        A nested dict ``{sample: {detector: median_Ct}}`` where each value
+        is the median Ct (float or ``"N/A"`` if all replicates are missing).
+    """
     #This will have to change...
     #TODO: make this aggregate either Ct values or N0 values?
     tmp = {}
@@ -244,48 +429,142 @@ def aggregateReplicateCts(data):
 #####################################
 
 def getLogVals(myArray):
+    """Return the base-10 logarithm of each element in a numpy array.
+
+    Args:
+        myArray: A numpy array of positive numeric values.
+
+    Returns:
+        A numpy array of the same shape containing log10 of each input value.
+    """
     return np.log10(myArray)
 
 #########
 # Four-parameter Logistic Model fitting
 #########
 def nthRoot(num,n):
+    """Compute the nth root of a number.
+
+    Args:
+        num: The base value (numeric).
+        n: The root degree (numeric, must not be zero).
+
+    Returns:
+        ``num ** (1.0 / n)`` as a float.
+    """
     return num ** (1.0/n)
 
 def qpcrFit(x,a,b,x0,y0):
-    """Same as fit but designed to run with optimize.curve_fit"""
+    """Evaluate the four-parameter logistic (4PL) model for qPCR fluorescence data.
+
+    Implements the model from Zhao et al.:
+        f(x) = y0 + a / (1 + (x / x0)^b)
+
+    Designed for use with ``scipy.optimize.curve_fit``.
+
+    Args:
+        x: Cycle number (scalar or array).
+        a: Amplitude parameter (difference between upper and lower
+           asymptotes).
+        b: Slope/steepness parameter.
+        x0: Inflection point (cycle at the midpoint of the curve).
+        y0: Baseline fluorescence (lower asymptote).
+
+    Returns:
+        Predicted fluorescence value(s) at cycle ``x``.
+    """
     return (y0+(a/(1+((x/x0)**b))))
 
 def qpcrFitResiduals(x,y,a,b,x0,y0):
-    """
-    Residuals:
-    errfunc = lambda p,x,y: y-fitfunc(p,x) #Distance to the target function (residuals)
+    """Compute residuals between observed fluorescence and the 4PL model.
+
+    Calculates ``y - qpcrFit(x, a, b, x0, y0)``.
+
+    Args:
+        x: Cycle number(s) (scalar or array).
+        y: Observed fluorescence value(s).
+        a: Amplitude parameter.
+        b: Slope/steepness parameter.
+        x0: Inflection point (cycle at midpoint).
+        y0: Baseline fluorescence (lower asymptote).
+
+    Returns:
+        Residual value(s) ``y - predicted``.
     """
     return y-qpcrFit(x,a,b,x0,y0)
 
 def CP_FDM(p):
+    """Compute the crossing-point using the First Derivative Maximum (FDM) method.
+
+    Args:
+        p: Sequence of four fitted 4PL parameters ``[a, b, x0, y0]``.
+
+    Returns:
+        The FDM crossing-point cycle number as a float.
+    """
     return (p[2]*nthRoot(((p[1]-1)/(p[1]+1)),p[1]))
 
 def CP_SDM(p):
+    """Compute the crossing-point using the Second Derivative Maximum (SDM) method.
+
+    Args:
+        p: Sequence of four fitted 4PL parameters ``[a, b, x0, y0]``.
+
+    Returns:
+        The SDM crossing-point cycle number as a float.
+    """
     return p[2]*nthRoot((np.sqrt((3*p[1]**2)*(p[1]**2-1))-(2*(1-p[1]**2)))/((p[1]**2)+(3*p[1])+2),p[1])
 
 def CP_SPE(p,rNoise):
+    """Compute the crossing-point using the Signal-to-Noise (SPE) method.
+
+    Args:
+        p: Sequence of four fitted 4PL parameters ``[a, b, x0, y0]``.
+        rNoise: Baseline noise estimate (standard error of the ``y0``
+            parameter, i.e., ``RNoise``).
+
+    Returns:
+        The SPE crossing-point cycle number as a float.
+    """
     return (p[2]*nthRoot(((p[0]-rNoise)/rNoise),p[1]))
 
 ###############################
 #Iterative Nonlinear Regression
 ###############################
 def nlmFit(x,a,b,y0):
-    """
-    Non-linear regression function to optimize for windows in exponential phase
-    here p = [a,b,y0]
+    """Evaluate the exponential nonlinear regression model for the exponential phase.
+
+    Models the exponential amplification phase as:
+        f(x) = y0 + a * (b ^ x)
+
+    Used for iterative nonlinear regression (iNLR) on windows within the
+    exponential phase. Parameters are ``[a, b, y0]``.
+
+    Args:
+        x: Cycle number (scalar or array).
+        a: Amplitude scaling factor.
+        b: Per-cycle amplification factor (related to efficiency: b ~ E).
+        y0: Baseline offset.
+
+    Returns:
+        Predicted fluorescence value(s) at cycle ``x``.
     """
     return y0+(a*(b**x))
 
 def nlmFitResiduals(x,y,a,b,y0):
-    """
-    Residuals:
-    errfunc = lambda p,x,y: y-nlmFit(p,x) #Distance to the target function (residuals)
+    """Compute residuals between observed fluorescence and the exponential NLM model.
+
+    Calculates ``y - nlmFit(x, a, b, y0)``.
+
+    Args:
+        x: Cycle number(s) (scalar or array).
+        y: Observed fluorescence value(s).
+        a: Amplitude scaling factor.
+        b: Per-cycle amplification factor.
+        y0: Baseline offset.
+
+    Returns:
+        Residual value(s) ``y - predicted``.
     """
     return y-nlmFit(x,a,b,y0)
 
@@ -294,6 +573,30 @@ def nlmFitResiduals(x,y,a,b,y0):
 #ddCt math
 ###############################
 def ddCt(data,medianCts,endoControl,reference):
+    """Compute delta-Ct and delta-delta-Ct values for each well.
+
+    For each well, dCt is calculated as:
+        dCt = Ct - median_Ct(sample, endoControl)
+
+    If the endogenous control Ct is unavailable for a sample, dCt is set to
+    ``"N/A"``. ddCt is then calculated as:
+        ddCt = dCt - median_dCt(reference, detector)
+
+    If the reference dCt is unavailable, ddCt is set to ``"N/A"``.
+
+    Args:
+        data: List of well dicts, each containing ``sample``, ``detector``,
+            and ``Ct`` keys.
+        medianCts: Nested dict ``{sample: {detector: median_Ct}}`` as
+            returned by ``aggregateReplicateCts``.
+        endoControl: Name of the endogenous control detector to use for
+            normalization.
+        reference: Name of the reference sample to use for ddCt calculation.
+
+    Returns:
+        The ``data`` list with ``dCt`` and ``ddCt`` keys added to each well
+        dict (values are floats or ``"N/A"``).
+    """
     tmp = {}
     #Calculate dCts
     for i in range(len(data)):
@@ -324,9 +627,37 @@ def ddCt(data,medianCts,endoControl,reference):
     return data
 
 def JohnsMethod(data,medianCts,endoControl,reference):
+    """Placeholder for an alternative relative quantification method.
+
+    Not yet implemented.
+
+    Args:
+        data: List of well dicts.
+        medianCts: Nested dict of median Ct values per sample/detector.
+        endoControl: Name of the endogenous control detector.
+        reference: Name of the reference sample.
+    """
     pass
 
 def RQ(data,effs):
+    """Calculate relative quantification (RQ) values for each well.
+
+    RQ is computed as:
+        RQ = meanEfficiency ^ (-ddCt)
+
+    Wells with a ``"N/A"`` ddCt or a missing efficiency entry receive
+    ``"N/A"`` for RQ.
+
+    Args:
+        data: List of well dicts containing ``detector`` and ``ddCt`` keys,
+            as returned by ``ddCt``.
+        effs: Dict ``{detector: {'meanEff': float, ...}}`` as returned by
+            ``summarizeEfficiencies``.
+
+    Returns:
+        The ``data`` list with an ``RQ`` key added to each well dict
+        (float or ``"N/A"``).
+    """
     res = []
     for d in data:
         try:
@@ -344,7 +675,14 @@ def RQ(data,effs):
 ###############################
 
 def mean(vals):
-    """Computes the mean of a list of numbers"""
+    """Compute the arithmetic mean of a list of numbers.
+
+    Args:
+        vals: An iterable of numeric values.
+
+    Returns:
+        The arithmetic mean as a float.
+    """
     n = 0
     s = 0.0
     for i in vals:
@@ -353,7 +691,19 @@ def mean(vals):
     return s / float(n)
 
 def median(vals):
-    """Computes the median of a list of numbers"""
+    """Compute the median of a list, ignoring any ``"N/A"`` sentinel values.
+
+    Filters out ``"N/A"`` entries before sorting. Sorts the remaining values
+    in-place. Returns ``"N/A"`` if no numeric values remain after filtering.
+
+    Args:
+        vals: A list that may contain numeric values and/or the string
+            ``"N/A"``.
+
+    Returns:
+        The median numeric value as a float, or the string ``"N/A"`` if all
+        values are ``"N/A"``.
+    """
     print(vals)
     vals = [i for i in vals if i != "N/A"]
     print(vals)
@@ -367,17 +717,46 @@ def median(vals):
         return vals[lenvals // 2]
 
 def variance(vals):
-    """Variance"""
+    """Compute the sample variance of a list of numbers.
+
+    Uses Bessel's correction (divides by N-1).
+
+    Args:
+        vals: A list of numeric values with at least two elements.
+
+    Returns:
+        The sample variance as a float.
+    """
     u = mean(vals)
     return sum((x - u)**2 for x in vals) / float(len(vals)-1)
 
 def sdev(vals):
-    """Standard deviation"""
+    """Compute the sample standard deviation of a list of numbers.
+
+    Returns 0.0 for lists with one or fewer elements.
+
+    Args:
+        vals: A list of numeric values.
+
+    Returns:
+        The sample standard deviation as a float.
+    """
     if len(vals) <=1: return 0.0
     return math.sqrt(variance(vals))
 
 def covariance(lst1, lst2):
-    """Covariance"""
+    """Compute the sample covariance between two equal-length lists.
+
+    Uses Bessel's correction (divides by N-1).
+
+    Args:
+        lst1: First list of numeric values.
+        lst2: Second list of numeric values; must be the same length as
+            ``lst1``.
+
+    Returns:
+        The sample covariance as a float.
+    """
     m1 = mean(lst1)
     m2 = mean(lst2)
     tot = 0.0
@@ -386,7 +765,21 @@ def covariance(lst1, lst2):
     return tot / (len(lst1)-1)
 
 def corr(lst1, lst2):
-    """Pearson's Correlation"""
+    """Compute the Pearson correlation coefficient between two lists.
+
+    Returns a very large number (1e1000) when the denominator is zero
+    (i.e., one or both lists have zero variance), used as a sentinel for
+    a perfect linear relationship in the sliding-window search.
+
+    Args:
+        lst1: First list of numeric values.
+        lst2: Second list of numeric values; must be the same length as
+            ``lst1``.
+
+    Returns:
+        The Pearson correlation coefficient as a float, or 1e1000 when the
+        standard deviation of either list is zero.
+    """
     num = covariance(lst1, lst2)
     denom = float(sdev(lst1) * sdev(lst2))
     if denom != 0:
@@ -395,13 +788,38 @@ def corr(lst1, lst2):
         return 1e1000
 
 def slope(xarray,yarray):
-    """Uses numpy, in fact assumes that the list arguments are numpy arrays."""
+    """Compute the ordinary least-squares regression slope.
+
+    Uses the standard closed-form formula. Requires numpy arrays because
+    element-wise multiplication (``xarray * yarray``) and vectorized
+    ``sum`` are used.
+
+    Args:
+        xarray: Numpy array of independent variable values.
+        yarray: Numpy array of dependent variable values; must be the same
+            length as ``xarray``.
+
+    Returns:
+        The regression slope as a float.
+    """
     n = float(len(xarray))
     m = (n*sum(xarray*yarray)-sum(xarray)*sum(yarray))/(n*sum(xarray**2)-(sum(xarray))**2)
     return m
 
 def intercept(xarray,yarray):
-    """Uses numpy, in fact assumes that the list arguments are numpy arrays."""
+    """Compute the ordinary least-squares regression intercept.
+
+    Uses the standard closed-form formula given the slope. Requires numpy
+    arrays because vectorized ``sum`` is used.
+
+    Args:
+        xarray: Numpy array of independent variable values.
+        yarray: Numpy array of dependent variable values; must be the same
+            length as ``xarray``.
+
+    Returns:
+        The regression intercept (y-axis) as a float.
+    """
     m = slope(xarray,yarray)
     n = float(len(xarray))
     b = (sum(yarray)-m*(sum(xarray)))/n
@@ -412,9 +830,35 @@ def intercept(xarray,yarray):
 ###############################
 
 def flagBadDetectors():
+    """Flag detectors with poor amplification characteristics.
+
+    Not yet implemented.
+    """
     pass
 
 def aggregateResults(data):
+    """Aggregate per-well RQ, N0, and dCt values into per-(sample, detector) summaries.
+
+    Computes mean, median, and standard deviation of RQ, dCt, and N0
+    for every (sample, detector) combination across all replicate wells.
+    Wells with ``"N/A"`` RQ are excluded from RQ and dCt summaries; N0
+    is always summarised.
+
+    Args:
+        data: List of well dicts containing ``sample``, ``detector``,
+            ``RQ``, ``N0``, and ``dCt`` keys, as returned by ``RQ``.
+
+    Returns:
+        A nested dict ``{sample: {detector: stats_dict}}`` where
+        ``stats_dict`` contains the keys: ``medianRQ``, ``meanRQ``,
+        ``sdevRQ``, ``mediandCt``, ``meandCt``, ``sdevdCt``,
+        ``medianN0``, ``meanN0``, ``sdevN0``. Unavailable values are
+        represented as ``"N/A"``.
+
+    Raises:
+        KeyError: If ``RQ`` values have not yet been computed on the data
+            (i.e., ``ddCt`` and ``RQ`` have not been called first).
+    """
     try:
         data[0]['RQ']
     except KeyError:
@@ -478,6 +922,22 @@ def aggregateResults(data):
     return res
 
 def printDataFrameRQs(RQsummary,effs,outFile):
+    """Write a tab-delimited summary of RQ results to a file and to stdout.
+
+    Outputs one row per (sample, detector) combination with columns:
+    Sample, Detector, meanEff, meanRQ, sdevRQ, medianRQ, meandCt,
+    mediandCt, sdevdCt, quant, ci.l, ci.u.
+
+    The ``quant`` column is efficiency^(-mediandCt); ``ci.l`` and ``ci.u``
+    are efficiency^(-(mediandCt +/- sdevdCt)), providing approximate
+    confidence intervals.
+
+    Args:
+        RQsummary: Nested dict as returned by ``aggregateResults``.
+        effs: Dict ``{detector: {'meanEff': float, ...}}`` as returned by
+            ``summarizeEfficiencies``.
+        outFile: Path to the output file to write.
+    """
     #Open out Handle
     outHandle = open(outFile,'w')
     #Print header row
@@ -496,16 +956,58 @@ def printDataFrameRQs(RQsummary,effs,outFile):
 #TODO:Create R Function to plot output from printDataFramRQs()
 
 def plotRQs(results):
+    """Plot relative quantification (RQ) values.
+
+    Not yet implemented.
+
+    Args:
+        results: Aggregated results dict as returned by ``aggregateResults``.
+    """
     pass
 
 def plotEdCt(results):
+    """Plot efficiency-corrected delta-Ct (EdCt) values.
+
+    Not yet implemented.
+
+    Args:
+        results: Aggregated results dict as returned by ``aggregateResults``.
+    """
     pass
 
 def doPlotting(plotScript = "qPCRPlotting.q"):
+    """Execute an external R plotting script as a subprocess.
+
+    Args:
+        plotScript: Path to the R script to execute. Defaults to
+            ``"qPCRPlotting.q"``.
+
+    Returns:
+        A tuple ``(status, output)`` as returned by
+        ``subprocess.getstatusoutput``.
+    """
     return subprocess.getstatusoutput(plotScript)
 
 
 def makeDvsS(results,detectors,samples,value = "mediandCt"):
+    """Build a detector-by-sample matrix of a chosen summary statistic.
+
+    Creates a 2-D numpy array indexed by detector (rows) and sample
+    (columns). Missing (sample, detector) combinations are filled with
+    ``nan``.
+
+    Args:
+        results: Nested dict ``{sample: {detector: stats_dict}}`` as
+            returned by ``aggregateResults``.
+        detectors: Ordered list of detector names defining the row order.
+        samples: Ordered list of sample names defining the column order.
+        value: Key within the innermost stats dict to extract. Defaults to
+            ``"mediandCt"``.
+
+    Returns:
+        A numpy float array of shape ``(len(detectors), len(samples))``
+        containing the requested statistic for each cell.
+    """
     matrix = np.zeros((len(detectors),len(samples)),float)
     for d in range(0,len(detectors)):
         for s in range(0,len(samples)):
@@ -520,6 +1022,17 @@ def makeDvsS(results,detectors,samples,value = "mediandCt"):
 ##############################
 
 def main(mainFile,cycleFile):
+    """Run the full qPCR analysis pipeline interactively.
+
+    Parses results and cycle-data files using the raw ABI format parsers,
+    computes efficiencies, interactively asks the user to select an endogenous
+    control and reference sample, performs ddCt/RQ calculations, writes
+    ``output.txt``, and runs the external plotting script.
+
+    Args:
+        mainFile: Path to the raw ABI tab-delimited results export file.
+        cycleFile: Path to the raw ABI cycle fluorescence export file.
+    """
     #Parse mainFile
     print("Parsing Results File...")
     data = parseRawABI(mainFile)
@@ -556,6 +1069,17 @@ def main(mainFile,cycleFile):
     return
 
 def test():
+    """Run a manual integration test using hard-coded HeLa RIP data files.
+
+    Parses ``'RIP HeLa clipped.txt'`` and ``'new_RIP_HeLa.txt'``, runs the
+    full ddCt/RQ pipeline with hard-coded endogenous control (``'hGAPDH'``)
+    and reference sample (``'IgG RIP'``), writes ``output.txt``, and
+    returns a detector-by-sample matrix of mediandCt values.
+
+    Returns:
+        A numpy float array of shape ``(n_detectors, n_samples)`` containing
+        the mediandCt for each (detector, sample) combination.
+    """
     cycleData = parseCycleData('RIP HeLa clipped.txt')
     cycleData = calculateEfficiencies(cycleData)
     effs = summarizeEfficiencies(cycleData)
diff --git a/src/qpcr/util.py b/src/qpcr/util.py
index 70bff2d..552cf53 100644
--- a/src/qpcr/util.py
+++ b/src/qpcr/util.py
@@ -1,11 +1,23 @@
 '''
-Created on Sep 2, 2010
+Miscellaneous utility functions for the qpcr package.
 
 @author: lgoff
 '''
 
 #Misc Tools and Utilities
 def uniqify(seq):
+    """Return a list of unique elements from a sequence.
+
+    Deduplicates by inserting elements into a dict. The returned order is
+    not guaranteed to be the same as the input order (depends on dict
+    insertion-order behaviour of the Python version).
+
+    Args:
+        seq: Any iterable of hashable elements.
+
+    Returns:
+        A list containing each unique element from ``seq`` exactly once.
+    """
     # Not order preserving
     keys = {}
     for e in seq:
diff --git a/src/seqlib/Alignment.py b/src/seqlib/Alignment.py
index 0640a86..57e87c9 100644
--- a/src/seqlib/Alignment.py
+++ b/src/seqlib/Alignment.py
@@ -1,18 +1,47 @@
-'''
-Created on Jun 30, 2009
+"""Short RNA read alignment data structure.
 
-@author: lgoff
-'''
+Provides the Alignment class for representing a single short-read alignment,
+with methods for strand testing, BED output, and conversion to intervallib
+Interval objects.
+
+Originally created on Jun 30, 2009.
+
+Author: lgoff
+"""
 from . import misc
 from .intervallib import *
 
 
 class Alignment(object):
-    """
-    Basic Alignment class for short RNA reads
-    Can be avoided directly in favor of aligner-specific implementations (ie. ShrimpRead and/or MAQRead)
+    """Basic alignment class for short RNA reads.
+
+    Can be bypassed in favour of aligner-specific implementations such as
+    ShrimpRead or MAQRead. Supports score-based sorting (higher scores sort
+    first) and conversion to BED or Interval format.
+
+    Attributes:
+        readname: Name/identifier of the aligned read.
+        chr: Chromosome name.
+        start: 0-based start coordinate.
+        end: End coordinate.
+        strand: Strand orientation ("+" or "-").
+        score: Alignment score (float).
+        readsequence: DNA sequence of the read.
+        readcount: Integer read count (-1 if unset).
     """
     def __init__(self,readname,chr,start,end,strand,score=0,readcount = -1,readsequence=''):
+        """Initialize an Alignment.
+
+        Args:
+            readname: Name/identifier of the read.
+            chr: Chromosome name string.
+            start: Start coordinate (converted to int).
+            end: End coordinate (converted to int).
+            strand: Strand string ("+" or "-").
+            score: Alignment score (default 0, converted to float).
+            readcount: Read count integer (default -1).
+            readsequence: DNA sequence string of the read (default "").
+        """
         self.readname = str(readname)
         self.chr = chr
         self.start = int(start)
@@ -23,34 +52,63 @@ def __init__(self,readname,chr,start,end,strand,score=0,readcount = -1,readseque
         self.readcount = readcount
 
     def __lt__(self, b):
+        """Compare by score in descending order (higher scores sort first)."""
         return self.score > b.score  # reversed because original was -cmp(self.score, b.score)
 
     def __eq__(self, b):
+        """Return True if self and b have the same score."""
         return self.score == b.score
 
     def __str__(self):
+        """Return a readname:chr:start:end string."""
         return "%s:%s:%d:%d" % (self.readname,self.chr,self.start,self.end)
 
     def __repr__(self):
+        """Return a readname:chr:start:end string."""
         return "%s:%s:%d:%d" % (self.readname,self.chr,self.start,self.end)
 
     def __len__(self):
+        """Return the length of the alignment in bases (end - start + 1)."""
         return self.end-self.start+1
 
     def isPlus(self):
+        """Return True if the alignment is on the "+" strand.
+
+        Returns:
+            True if self.strand == "+", otherwise False.
+        """
         if self.strand=="+":
             return True
         else:
             return False
 
     def isMinus(self):
+        """Return True if the alignment is on the "-" strand.
+
+        Returns:
+            True if self.strand == "-", otherwise False.
+        """
         if self.strand=="-":
             return True
         else:
             return False
 
     def toInterval(self):
+        """Convert this alignment to an intervallib.Interval.
+
+        Returns:
+            An Interval with the same coordinates, score, readcount, and
+            readname as this alignment.
+        """
         return Interval(self.chr,self.start,self.end,self.strand,self.score,self.readcount,name=self.readname)
 
     def toBed(self):
+        """Return a BED-formatted string for this alignment.
+
+        The name field is encoded using misc.seq2nuID applied to the read
+        sequence.
+
+        Returns:
+            Tab-delimited BED line string with a trailing newline.
+        """
         return ("%s\t%d\t%d\t%s\t%d\t%s\n" % (self.chr,self.start,self.end,misc.seq2nuID(self.readsequence),self.readcount,self.strand))
diff --git a/src/seqlib/Chip.py b/src/seqlib/Chip.py
index fcf4863..5374845 100644
--- a/src/seqlib/Chip.py
+++ b/src/seqlib/Chip.py
@@ -1,6 +1,12 @@
 '''
+Tools for working with NimbleGen ChIP-chip tiling array data.
+
+Provides an interval class with tiling-array-specific methods, parsers for
+NimbleGen GFF output files, interval-merging utilities, and statistical
+helpers for identifying enriched regions via permutation-based p-value
+estimation — following the approach of Guttman et al.
+
 Created on Jul 6, 2009
-This module will attempt to deal with the nimblegen array data in a similar mechanism to that achieved by Guttman et al.
 
 @author: lgoff
 '''
@@ -19,39 +25,103 @@
 
 
 class ChipInterval(Interval):
-    """Extends basic Interval class with Tiling array methods and attributes"""
+    """Genomic interval extended with tiling-array probe-hierarchy support.
+
+    Extends the basic Interval class with parent/child relationships so
+    that individual NimbleGen probes (children) can be grouped under a
+    merged enriched region (parent), and provides methods for computing
+    coverage maps and plots from the probe scores.
+
+    Attributes:
+        parents: List of ChipInterval objects that contain this interval.
+        children: List of ChipInterval objects contained within this
+            interval (e.g. individual probes belonging to an enriched
+            region).
+    """
 
     def __init__(self, chr, start, end, strand="*", score=0.0, readcount = -1,name="",sequence = "",data={}):
+        """Initialise a ChipInterval.
+
+        Args:
+            chr: Reference sequence name / chromosome.
+            start: Start coordinate of the interval.
+            end: End coordinate of the interval.
+            strand: Strand indicator; defaults to '*' (unstranded).
+            score: Probe or enrichment score; defaults to 0.0.
+            readcount: Number of reads/probes; defaults to -1 (unset).
+            name: Optional label for the interval; defaults to ''.
+            sequence: Optional genomic sequence; defaults to ''.
+            data: Optional dict of additional attributes; defaults to {}.
+        """
         Interval.__init__(self, chr, start, end, strand=strand, score=score, readcount = readcount,name=name,sequence = sequence,data=data)
         self.parents = []
         self.children = []
 
     def addChild(self, child):
-        """Adds child node to self.children"""
+        """Add a child interval to this interval's children list.
+
+        The child is only added if it is not already present.  A back-
+        reference from the child to this interval is added to
+        ``child.parents``.
+
+        Args:
+            child: A ChipInterval to add as a child of this interval.
+        """
         #assert child not in self.children
         if child not in self.children:
             child.parents.append(self)
             self.children.append(child)
 
     def removeChild(self, child):
-        """Removes child node from self.children (not sure how or if this works. Don't trust it yet)"""
+        """Remove a child interval from this interval's children list.
+
+        Also removes the corresponding back-reference from ``child.parents``.
+        The correctness of this method has not been fully verified.
+
+        Args:
+            child: The ChipInterval to remove from ``self.children``.
+        """
         child.parents.remove(self)
         self.children.remove(child)
 
     def childScores(self):
-        """Returns list of scores for each interval in self.children"""
+        """Return the score attribute of each child interval.
+
+        Returns:
+            A list of score values, one per element in ``self.children``,
+            in the same order as ``self.children``.
+        """
         return [x.score for x in self.children]
 
     def childAvg(self):
-        """Empty"""
+        """Placeholder for computing the average score across child intervals.
+
+        Not yet implemented.
+        """
         pass
 
     def childMedian(self):
-        """Empty"""
+        """Placeholder for computing the median score across child intervals.
+
+        Not yet implemented.
+        """
         pass
 
     def makeValMap(self,value = 'readcount'):
-        """Check these two to see which one is right..."""
+        """Build a per-base value map by averaging child interval attributes.
+
+        Creates ``self.valMap``, a numpy array of length ``len(self)``
+        initialised to -1.  For each base position covered by at least one
+        child interval the stored value is the mean of the specified
+        attribute across all children that cover that base.
+
+        Note: An alternative implementation exists in a commented-out block
+        in the source; both approaches are noted as unverified.
+
+        Args:
+            value: Name of the attribute on each child ChipInterval whose
+                values are averaged.  Defaults to ``'readcount'``.
+        """
         self.valMap = np.zeros(len(self))
         self.valMap = self.valMap-1
         myTmp = []
@@ -96,7 +166,13 @@ def makeValMap(self):
     """
 
     def plotVals(self):
-        """Creates a line plot (via rpy2) across all bases within interval of the scores from self.valMap for the given base"""
+        """Plot probe scores across this interval using rpy2.
+
+        Opens an X11 window and draws a step-style line plot.  Each child
+        probe is drawn as a horizontal segment at its score level spanning
+        its start to end coordinates.  If ``self.valMap`` has not yet been
+        computed, ``makeValMap`` is called automatically.
+        """
         if 'valMap' not in self.__dict__:
             self.makeValMap()
         robjects.r.x11()
@@ -106,7 +182,10 @@ def plotVals(self):
             robjects.r.lines((x.start,x.end),(x.score,x.score),lwd=2)
 
     def plot(self):
-        """Convenience wrapper for self.plotVals"""
+        """Convenience wrapper that calls plotVals to display the interval.
+
+        Equivalent to calling ``self.plotVals()`` directly.
+        """
         self.plotVals()
 
 #    def uniqifySig(self):
@@ -116,6 +195,25 @@ def plot(self):
 #        self.significant = keys.keys()
 
     def scan(self,permuted,windowSize,threshold):
+        """Scan child probes with a sliding window to identify significant regions.
+
+        Sorts ``self.children`` in place and slides a window of
+        ``windowSize`` probes across them.  For each window, computes the
+        mean probe score and compares it against a pre-computed permutation
+        distribution to obtain an empirical p-value.  Probes in windows
+        whose p-value is at or below ``threshold`` are added to
+        ``self.significant``.
+
+        Args:
+            permuted: A dict keyed by window size whose values are numpy
+                arrays of maximum-window-mean values from permuted data (as
+                produced by ``getRandomDist``).  The key ``windowSize`` must
+                be present.
+            windowSize: Number of consecutive probes in each sliding window.
+            threshold: Maximum empirical p-value (proportion of permuted
+                values >= observed mean) for a window to be considered
+                significant.
+        """
         self.children.sort()
         if 'significant' not in self.__dict__:
             self.significant = []
@@ -132,8 +230,27 @@ def scan(self,permuted,windowSize,threshold):
 
 #This should be deleted...
 class ChipData(object):
-    """Container for one array's worth of NimbleGen data"""
+    """Container for one NimbleGen array's worth of probe data.
+
+    Deprecated — this class is marked for deletion in the source.
+
+    Parses a NimbleGen GFF file on construction and organises the resulting
+    ChipInterval probe objects by chromosome.
+
+    Attributes:
+        fname: Path to the NimbleGen GFF file that was parsed.
+        sampleName: Human-readable label for this sample.
+        probeData: Dict mapping chromosome name to a list of ChipInterval
+            objects for probes on that chromosome.
+    """
+
     def __init__(self, fname, sampleName):
+        """Initialise a ChipData container by parsing a NimbleGen GFF file.
+
+        Args:
+            fname: Path to the NimbleGen GFF output file to parse.
+            sampleName: Label for this array sample.
+        """
         self.fname = fname
         self.sampleName = sampleName
         self.probeData = {}
@@ -146,19 +263,45 @@ def __init__(self, fname, sampleName):
             self.probeData[ci.chr].append(ci)
 
     def sort(self):
-        """Sorts all chromosomes seperately and in place"""
+        """Sort probe lists for all chromosomes in place.
+
+        Iterates over ``self.data`` (note: the attribute populated on
+        construction is ``self.probeData``; this method references
+        ``self.data`` which may not exist).
+        """
         for k in self.data.keys():
             self.data[k].sort()
 
     def shuffle(self,chr):
-        """This doesn't work yet"""
+        """Shuffle probe scores for a chromosome in place.
+
+        Note: This method is not yet correctly implemented — ``random.shuffle``
+        operates on the temporary ``vals`` list and does not modify
+        ``self.probeData``.
+
+        Args:
+            chr: Chromosome key to look up in ``self.probeData``.
+
+        Returns:
+            None (``random.shuffle`` always returns None).
+        """
         vals = [x.score for x in self.probeData[chr]]
         return random.shuffle(vals)
 
 #End crap
 
 def nimblegenIter(fname):
-    """Returns a generator of ChipInterval objects from a nimblegen .GFF output file"""
+    """Yield ChipInterval objects parsed from a NimbleGen GFF output file.
+
+    Skips comment lines (starting with '#') and extracts chromosome,
+    start, end, score, and probe name from each data row.
+
+    Args:
+        fname: Path to a NimbleGen GFF file.
+
+    Yields:
+        ChipInterval objects, one per non-comment line in the file.
+    """
     handle = open(fname,'r')
     for line in handle:
         if line.startswith("#"): continue
@@ -167,6 +310,18 @@ def nimblegenIter(fname):
         yield ChipInterval(tokens[0],tokens[3],tokens[4],score=tokens[5],name=pname)
 
 def parseNimblegen(fname):
+    """Parse an entire NimbleGen GFF file into a list of ChipInterval objects.
+
+    Convenience wrapper around ``nimblegenIter`` that collects all intervals
+    into a list rather than lazily yielding them.
+
+    Args:
+        fname: Path to a NimbleGen GFF file.
+
+    Returns:
+        A list of ChipInterval objects, one per non-comment line in the
+        file.
+    """
     iter = nimblegenIter(fname)
     rtrn = []
     for i in iter:
@@ -174,8 +329,30 @@ def parseNimblegen(fname):
     return rtrn
 
 def joinNimblegenIntervals(intervals,start='start',end='end',offset=1000):
-    """
-    Returns a list of independent transcription units overlaping by offset
+    """Merge overlapping NimbleGen probe intervals into enriched regions.
+
+    Sorts the probe list and iterates through it, merging any probes that
+    intersect (with optional extension by ``offset``) into a single
+    ChipInterval.  Each merged interval stores its constituent probes as
+    children and resets its name and score.
+
+    Returns the input list unchanged if it is empty.
+
+    Args:
+        intervals: A list of ChipInterval objects (typically from
+            ``parseNimblegen``).  The list is sorted in place.
+        start: Attribute name used as the start coordinate when testing
+            for intersection.  Defaults to ``'start'``.
+        end: Attribute name used as the end coordinate when testing for
+            intersection.  Defaults to ``'end'``.
+        offset: Number of bases by which each interval is extended before
+            testing for overlap, effectively merging probes within this
+            distance.  Defaults to 1000.
+
+    Returns:
+        A list of merged ChipInterval objects representing independent
+        enriched regions, each with a ``children`` list of the constituent
+        probe intervals.
     """
 
     if not intervals: return intervals
@@ -202,12 +379,37 @@ def joinNimblegenIntervals(intervals,start='start',end='end',offset=1000):
     return non_overlapping
 
 def probeScores(probes):
-    """Returns list of scores across all a list of probes"""
+    """Extract scores from a list of probe intervals into a numpy array.
+
+    Args:
+        probes: A list of ChipInterval (or any object with a ``score``
+            attribute) objects.
+
+    Returns:
+        A numpy array of dtype float32 containing the score of each probe
+        in the same order as the input list.
+    """
     return np.array([x.score for x in probes],dtype='f')
 
 def getRandomDist(probes,nRandom,windowSize):
-    """Returns a numpy array of length 'nRandom' corresponding to the max values of sliding windows of size 'windowSize'
-    from shuffled probe data.
+    """Build an empirical null distribution of maximum sliding-window means.
+
+    Repeatedly shuffles the probe score array in place, slides a window of
+    ``windowSize`` across it, records the maximum window mean for each
+    shuffle, and returns all maxima as a numpy array.  This distribution is
+    used to compute empirical p-values for observed window means.
+
+    Args:
+        probes: A numpy array (or list) of numeric probe scores.  The array
+            is shuffled in place during this function — pass a copy if the
+            original order must be preserved.
+        nRandom: Number of shuffle iterations (i.e. length of the returned
+            distribution array).
+        windowSize: Number of consecutive probes in each sliding window.
+
+    Returns:
+        A numpy array of dtype float32 with ``nRandom`` elements, where each
+        element is the maximum window mean observed in one shuffle iteration.
     """
     sys.stderr.write("Getting %d Max value distributions from windows of size %d:\n" % (nRandom,windowSize))
     #scores = probeScores(probes)
@@ -226,11 +428,33 @@ def getRandomDist(probes,nRandom,windowSize):
     return maxVals
 
 def calcPVals(segScores,permuted,windowSize):
-    """This does not work yet"""
+    """Count permuted values at least as extreme as the observed score.
+
+    Note: This function is not yet correctly implemented.
+
+    Args:
+        segScores: The observed test statistic (scalar or array) to compare
+            against the permuted distribution.
+        permuted: A numpy array of values from the null distribution (e.g.
+            as returned by ``getRandomDist``).
+        windowSize: Window size used to generate the distribution (not
+            currently used in the comparison).
+
+    Returns:
+        The number of elements in ``permuted`` that are >= ``segScores``.
+    """
     return len(permuted[permuted>=segScores])
 
 
 def main():
+    """Run the default ChIP-chip analysis pipeline.
+
+    Discovers all ``.gff`` files in the current directory, loads and
+    normalises them via ``continuousData.SimpleChIPData``, merges adjacent
+    probes, and generates permutation-based null distributions for a set of
+    predefined window sizes (5, 7, 9, 11 probes).  The resulting
+    distributions are stored in ``permuted`` keyed by window size.
+    """
     files = glob.glob("*.gff")
     data = continuousData.SimpleChIPData(files)
     data.normalize()
diff --git a/src/seqlib/GTFlib.py b/src/seqlib/GTFlib.py
index 9c27dcb..690e34b 100644
--- a/src/seqlib/GTFlib.py
+++ b/src/seqlib/GTFlib.py
@@ -1,11 +1,16 @@
-'''
-Created on Aug 31, 2010
+"""Parsing and data structures for GTF (Gene Transfer Format) files.
 
-All of this is very fragile and is
-absolutely dependent on a unique geneId and unique transcriptId for any records...
+All of this is very fragile and is absolutely dependent on a unique geneId
+and unique transcriptId for any records.
 
-@author: lgoff
-'''
+Provides GTF_Entry, GTFTranscriptContainer, and GTFGeneContainer classes for
+holding GTF data, along with iterator functions for streaming over transcripts
+and genes, and utility functions for building attribute dictionaries and tables.
+
+Originally created on Aug 31, 2010.
+
+Author: lgoff
+"""
 ###########
 #Imports
 ###########
@@ -20,8 +25,13 @@
 #Error Handling
 #######################
 class Error(Exception):
-    """Base class for exceptions in this module."""
+    """Base class for exceptions in this module.
+
+    Provides a message property with getter/setter so subclasses can store
+    a human-readable error description.
+    """
     def __str__(self):
+        """Return the string representation of the error message."""
         return str(self.message)
     def _get_message(self, message): return self._message
     def _set_message(self, message): self._message = message
@@ -42,14 +52,22 @@ def __init__(self, message):
 #########################
 
 class GTF_Entry:
-    '''
-    Holds a row's worth of GTF information.
-    '''
+    """Holds a single row's worth of GTF/GFF information.
+
+    Attributes:
+        contig: Sequence name / chromosome.
+        source: Annotation source name.
+        feature: Feature type (e.g. "exon", "CDS", "transcript").
+        frame: Reading frame (".","0","1","2").
+        start: 1-based start coordinate (integer).
+        end: 1-based end coordinate (integer).
+        score: Score value (float or ".").
+        strand: Strand ("+" or "-" or ".").
+        attributes: Dictionary of parsed key-value attribute pairs.
+    """
 
     def __init__(self):
-        '''
-        Constructor
-        '''
+        """Construct a GTF_Entry with default empty/sentinel field values."""
         self.contig = "."
         self.source = "."
         self.feature = "."
@@ -61,15 +79,23 @@ def __init__(self):
         self.attributes = {}
 
     def __lt__(self, b):
+        """Compare GTF entries by midpoint coordinate."""
         return (self.start + self.end) // 2 < (b.start + b.end) // 2
 
     def __eq__(self, b):
+        """Return True if two GTF entries share the same midpoint coordinate."""
         return (self.start + self.end) // 2 == (b.start + b.end) // 2
 
     def __repr__(self):
+        """Return a transcript_id:feature string representation."""
         return self.attributes['transcript_id']+":"+self.feature
 
     def addGTF_Entry(self,gtf_entry):
+        """Copy all fields from another GTF_Entry into self.
+
+        Args:
+            gtf_entry: A GTF_Entry instance whose fields will be copied.
+        """
         self.contig = gtf_entry.contig
         self.source = gtf_entry.source
         self.feature = gtf_entry.feature
@@ -133,6 +159,14 @@ def parseInfo(self,myAttributes,line ):
             self.attributes[n] = v
 
     def toGTF(self):
+        """Serialize this entry back to a GTF-formatted string.
+
+        Writes gene_id and transcript_id first (as required by the GTF spec),
+        then all remaining attributes in arbitrary order.
+
+        Returns:
+            A GTF-formatted line string ending with a newline.
+        """
         tmp = '%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t' % (self.contig,self.source,self.feature,self.start,self.end,str(self.score),self.strand,self.frame)
         #Print 'gene_id' and 'transcript_id' as first and second attributes (required by GTF spec.)
         for attr in ['gene_id','transcript_id']:
@@ -151,10 +185,20 @@ def toGTF(self):
 #GTFTranscriptContainer
 ############
 class GTFTranscriptContainer(object):
+    """Container grouping all GTF_Entry instances sharing a transcript_id.
+
+    Attributes:
+        features: List of GTF_Entry objects belonging to this transcript.
+        start: Minimum start coordinate across all features.
+        end: Maximum end coordinate across all features.
+        contig: Chromosome/contig name.
+        strand: Strand orientation.
+        transcriptId: transcript_id attribute value.
+        geneId: gene_id attribute value.
+    """
+
     def __init__(self):
-        '''
-        Constructor
-        '''
+        """Construct an empty GTFTranscriptContainer with sentinel values."""
         self.features = []
         self.start = -1
         self.end = -1
@@ -164,15 +208,30 @@ def __init__(self):
         self.geneId = ''
 
     def __len__(self):
+        """Return the genomic span of the transcript (end - start + 1)."""
         return self.end-self.start+1
 
     def __lt__(self, b):
+        """Compare transcript containers by midpoint coordinate."""
         return (self.start + self.end) // 2 < (b.start + b.end) // 2
 
     def __eq__(self, b):
+        """Return True if two transcript containers share the same midpoint."""
         return (self.start + self.end) // 2 == (b.start + b.end) // 2
 
     def addFeature(self,gtf_entry):
+        """Add a GTF_Entry to this transcript container.
+
+        Initialises contig, strand, and transcriptId from the first feature
+        added. Asserts that subsequent features share the same transcript_id.
+        Updates self.start and self.end to span all features.
+
+        Args:
+            gtf_entry: A GTF_Entry instance to add.
+
+        Raises:
+            AssertionError: If gtf_entry has a different transcript_id.
+        """
         if self.transcriptId == '':
             self.contig = gtf_entry.contig
             self.strand = gtf_entry.strand
@@ -184,10 +243,23 @@ def addFeature(self,gtf_entry):
         self.update()
 
     def update(self):
+        """Recompute self.start and self.end from the current feature list."""
         self.start = min([x.start for x in self.features])
         self.end = max([x.end for x in self.features])
 
     def toSplicedInterval(self):
+        """Convert this transcript container to a SplicedInterval.
+
+        Extracts exon features, sorts them by exon_number, and constructs a
+        SplicedInterval using their lengths and offsets.
+
+        Returns:
+            A SplicedInterval representing the spliced transcript.
+
+        Raises:
+            ValueError: If more than one distinct transcript_id is found
+                in the feature list.
+        """
         transcripts = uniqify([x.attributes['transcript_id'] for x in self.features])
         if len(transcripts) > 1:
             raise ValueError ("Something is wrong, there are too many different transcript_ids")
@@ -204,18 +276,27 @@ def toSplicedInterval(self):
 ############
 
 class GTFGeneContainer(object):
-    '''
-    Container for all GTF_Entry instances with a common geneId
+    """Container for all GTF_Entry instances sharing a common gene_id.
+
     Assumptions:
-        - gene_id field is unique to a gene locus (ie. not shared amongst gene duplicates
-        - There is no guarantee that the order of rows is preserved during reading in and returning GTF
+        - The gene_id field is unique to a gene locus (not shared among
+          gene duplicates).
+        - There is no guarantee that the row order is preserved during
+          reading or when returning GTF output.
 
-    '''
+    Attributes:
+        features: List of GTF_Entry objects for this gene.
+        transcripts: List of GTFTranscriptContainer objects for this gene.
+        start: Minimum start coordinate across all features/transcripts.
+        end: Maximum end coordinate across all features/transcripts.
+        contig: Chromosome/contig name.
+        strand: Strand orientation.
+        geneId: gene_id attribute value.
+        sequence: DNA sequence string (empty by default).
+    """
 
     def __init__(self):
-        '''
-        Constructor
-        '''
+        """Construct an empty GTFGeneContainer with sentinel values."""
         self.features = []
         self.transcripts = []
         self.start = -1
@@ -226,15 +307,30 @@ def __init__(self):
         self.sequence = ''
 
     def __len__(self):
+        """Return the genomic span of the gene (end - start + 1)."""
         return self.end-self.start+1
 
     def __lt__(self, b):
+        """Compare gene containers by midpoint coordinate."""
         return (self.start + self.end) // 2 < (b.start + b.end) // 2
 
     def __eq__(self, b):
+        """Return True if two gene containers share the same midpoint."""
         return (self.start + self.end) // 2 == (b.start + b.end) // 2
 
     def addFeature(self,gtf_entry):
+        """Add a GTF_Entry feature to this gene container.
+
+        Initialises contig, strand, and geneId from the first feature added.
+        Asserts that subsequent features share the same gene_id. Updates
+        self.start and self.end.
+
+        Args:
+            gtf_entry: A GTF_Entry instance to add.
+
+        Raises:
+            AssertionError: If gtf_entry has a different gene_id.
+        """
         if self.geneId == '':
             self.contig = gtf_entry.contig
             self.strand = gtf_entry.strand
@@ -244,6 +340,18 @@ def addFeature(self,gtf_entry):
         self.update()
 
     def addGTFTranscript(self,gtf_transcript):
+        """Add a GTFTranscriptContainer to this gene container.
+
+        Initialises contig, strand, and geneId from the first transcript added.
+        Asserts that subsequent transcripts share the same geneId, contig, and
+        strand. Updates self.start and self.end via transcriptUpdate().
+
+        Args:
+            gtf_transcript: A GTFTranscriptContainer instance to add.
+
+        Raises:
+            AssertionError: If geneId, contig, or strand do not match.
+        """
         if self.geneId == '':
             self.contig = gtf_transcript.contig
             self.strand = gtf_transcript.strand
@@ -253,21 +361,34 @@ def addGTFTranscript(self,gtf_transcript):
         self.transcriptUpdate()
 
     def update(self):
+        """Recompute self.start and self.end from the current features list."""
         self.start = min([x.start for x in self.features])
         self.end = max([x.end for x in self.features])
 
     def transcriptUpdate(self):
+        """Recompute self.start and self.end from the transcripts list."""
         self.start = min([x.start for x in self.transcripts])
         self.end = max([x.end for x in self.transcripts])
 
 
     def propogateLincName(self,lincName):
+        """Set the linc_name attribute on all features, and gene_name if absent.
+
+        Args:
+            lincName: The lincRNA name string to propagate to all features.
+        """
         for feat in self.features:
             feat.attributes['linc_name'] = lincName
             if 'gene_name' not in feat.attributes:
                 feat.attributes['gene_name'] = lincName
 
     def addAttribute(self,key,value):
+        """Add or overwrite an attribute key-value pair on all features.
+
+        Args:
+            key: Attribute name string.
+            value: Attribute value to assign.
+        """
         for feat in self.features:
             feat.attributes[key] = value
 
@@ -277,15 +398,27 @@ def geneToBed(self):
         return "%s\t%d\t%d\t%s\t0\t%s\t%s\t%s" % (self.contig,self.start,self.end,self.attributes['transcript_id'],self.strand,",".join(self.exonLengths),",".join(self.exonOffsets))
 
     def transcriptsToBed(self):
+        """Placeholder for BED output of transcripts (not yet implemented)."""
         pass
 
     def getGTF(self):
+        """Return a GTF string containing all features of this gene.
+
+        Returns:
+            Multi-line string of GTF-formatted rows for every feature.
+        """
         tmp = ''
         for feat in self.features:
             tmp += feat.toGTF()
         return tmp
 
     def toInterval(self):
+        """Convert this gene to an Interval spanning its genomic footprint.
+
+        Returns:
+            An Interval with 0-based start (start-1), end, strand, and the
+            gene_id as its name.
+        """
         return intervallib.Interval(self.contig,self.start-1,self.end,self.strand,name=self.geneId)
 
     # def fetchSequence(self,genome='hg19',connection=None):
@@ -303,6 +436,17 @@ def toInterval(self):
 #lineIterator
 #############
 def lineIterator(gtfHandle):
+    """Yield GTF_Entry objects for every non-comment line in gtfHandle.
+
+    Skips lines starting with "#". Parses each remaining line into a
+    GTF_Entry via GTF_Entry.read().
+
+    Args:
+        gtfHandle: An open file handle to a GTF file.
+
+    Yields:
+        GTF_Entry objects, one per data line.
+    """
     while True:
         line = gtfHandle.readline()
         if not line: return
@@ -312,6 +456,18 @@ def lineIterator(gtfHandle):
         yield gtf_entry
 
 def GTFGeneIterator(gtfFile,verbose = False):
+    """Iterate over genes in a GTF file, yielding one GTFGeneContainer per gene.
+
+    Groups all GTF_Entry rows by gene_id and yields a fully-populated
+    GTFGeneContainer for each unique gene_id found.
+
+    Args:
+        gtfFile: Path to the GTF file.
+        verbose: If True, write progress messages to stderr (default False).
+
+    Yields:
+        GTFGeneContainer objects, one per unique gene_id.
+    """
     handle = open(gtfFile,'r')
     iter = lineIterator(handle)
     res = {}
@@ -324,6 +480,18 @@ def GTFGeneIterator(gtfFile,verbose = False):
         yield res[k]
 
 def GTFGeneIterator2(gtfFile,verbose=False):
+    """Iterate over genes by grouping transcripts, yielding one GTFGeneContainer per gene.
+
+    An alternative to GTFGeneIterator that builds genes from
+    GTFTranscriptContainer objects rather than raw GTF_Entry rows.
+
+    Args:
+        gtfFile: Path to the GTF file.
+        verbose: If True, write progress messages to stderr (default False).
+
+    Yields:
+        GTFGeneContainer objects, one per unique gene_id.
+    """
     iter = GTFTranscriptIterator(gtfFile,verbose=verbose)
     res = {}
     for i in iter:
@@ -333,6 +501,18 @@ def GTFGeneIterator2(gtfFile,verbose=False):
         yield res[k]
 
 def GTFTranscriptIterator(gtfFile,verbose = False):
+    """Iterate over transcripts in a GTF file, yielding one GTFTranscriptContainer per transcript.
+
+    Groups all GTF_Entry rows by transcript_id and yields a fully-populated
+    GTFTranscriptContainer for each unique transcript_id found.
+
+    Args:
+        gtfFile: Path to the GTF file.
+        verbose: If True, write progress messages to stderr (default False).
+
+    Yields:
+        GTFTranscriptContainer objects, one per unique transcript_id.
+    """
     handle = open(gtfFile,'r')
     iter = lineIterator(handle)
     res = {}
@@ -394,12 +574,15 @@ def GTFAttributeTable(gtfFile,outfile,idField='gene_id'):
     return
 
 def test():
-    """
-from RNASeq import GTFlib
-fname = 'linc_catalog.gtf'
-iter = GTFlib.GTFGeneIterator(fname)
-for i in iter:
-    print i.getGTF(),
+    """Placeholder test function. No-op.
+
+    Example usage (Python 2 style, for reference)::
+
+        from RNASeq import GTFlib
+        fname = 'linc_catalog.gtf'
+        iter = GTFlib.GTFGeneIterator(fname)
+        for i in iter:
+            print i.getGTF(),
     """
     pass
 
diff --git a/src/seqlib/JensenShannon.py b/src/seqlib/JensenShannon.py
index f6bf249..2ca643a 100644
--- a/src/seqlib/JensenShannon.py
+++ b/src/seqlib/JensenShannon.py
@@ -1,10 +1,19 @@
 #!/usr/bin/env python
 
-"""
-JensenShannon.py
+"""Jensen-Shannon divergence utilities for comparing probability distributions.
+
+Provides functions to compute the Jensen-Shannon (JS) divergence between pairs
+of discrete probability distributions and to construct pairwise JS divergence
+matrices from a collection of distributions.  The JS divergence is a
+symmetrised, smoothed version of the Kullback-Leibler (KL) divergence and is
+defined as::
+
+    JS(A || B) = 0.5 * KL(A || M) + 0.5 * KL(B || M),  where M = (A + B) / 2
 
-Created by Loyal Goff on Nov 10, 2010.
-Copyright (c) 2010
+Because it is bounded in [0, ln 2] (or [0, 1] in bits), its square root is a
+proper metric known as the Jensen-Shannon distance.
+
+Originally created by Loyal Goff on Nov 10, 2010.
 """
 
 import rpy2.robjects as r
@@ -15,6 +24,26 @@
 
 #efficnent js_div
 def js_div_matrix(a):
+    """Compute a pairwise Jensen-Shannon divergence matrix efficiently.
+
+    For each pair of rows ``i`` and ``j`` in ``a``, computes::
+
+        JS(i, j) = 0.5 * (H(M) - 0.5*(H(i) + H(j)))
+
+    where ``M = (a[i] + a[j]) / 2`` and ``H`` denotes Shannon entropy.
+    The implementation avoids an O(n^2) loop over pairs by vectorising
+    the inner computation row-by-row, giving O(n) outer iterations.
+
+    Args:
+        a: A 2-D array-like of shape ``(n, d)`` where each row is a
+            probability distribution over ``d`` categories (rows should
+            sum to 1 for the result to be a true JS divergence).
+
+    Returns:
+        A symmetric ``(n, n)`` NumPy array ``W`` where ``W[i, j]`` is the
+        Jensen-Shannon divergence between rows ``i`` and ``j`` of ``a``.
+        Diagonal entries are 0.
+    """
     a=array(a)
     W=zeros((a.shape[0],a.shape[0]))
     e=-entropy(a.transpose())
@@ -27,6 +56,19 @@ def js_div_matrix(a):
     return W
 
 def make_probs(a):
+    """Normalise each row of a 2-D array to sum to 1.
+
+    Divides each row of ``a`` by its sum, converting raw counts or
+    unnormalised weights into proper probability distributions.
+
+    Args:
+        a: A 2-D NumPy array of shape ``(n, d)`` with non-negative
+            entries.  Each row must have a positive sum.
+
+    Returns:
+        A 2-D NumPy array of the same shape as ``a`` where each row
+        sums to 1.0.
+    """
     sums = sum(a,1)
     res = zeros(a.shape)
     for i in range(a.shape[0]):
@@ -34,13 +76,56 @@ def make_probs(a):
     return res
 
 def js_div(A,B):
+    """Compute the Jensen-Shannon divergence between two distributions.
+
+    The JS divergence is defined as::
+
+        JS(A || B) = 0.5 * KL(A || M) + 0.5 * KL(B || M)
+
+    where ``M = (A + B) / 2`` is the mixture distribution and
+    ``KL(P || Q) = sum(P * log(P / Q))``.  The result is symmetric and
+    always non-negative.
+
+    Args:
+        A: A 1-D array-like representing the first probability
+            distribution.  All entries should be positive for a
+            well-defined result.
+        B: A 1-D array-like representing the second probability
+            distribution of the same length as ``A``.
+
+    Returns:
+        A scalar float equal to the Jensen-Shannon divergence between
+        ``A`` and ``B``.
+    """
     half=(A+B)/2
     return 0.5*kl_div(A,half)+0.5*kl_div(B,half)
 
 def kl_div(A,B):
+    """Compute the Kullback-Leibler divergence of distribution A from B.
+
+    Calculates the KL divergence using the formula::
+
+        KL(A || B) = sum(A * log(A / B))
+
+    where the sum is taken element-wise.  The result is non-negative and
+    equals zero only when ``A`` and ``B`` are identical.  Note that the
+    KL divergence is not symmetric: ``kl_div(A, B) != kl_div(B, A)``
+    in general.
+
+    Args:
+        A: A 1-D array-like representing the first (reference)
+            probability distribution.  All entries should be positive.
+        B: A 1-D array-like representing the second probability
+            distribution of the same length as ``A``.  All entries
+            should be positive to avoid division by zero.
+
+    Returns:
+        A scalar float equal to the KL divergence KL(A || B).
+    """
     return sum(multiply(A,log(A/B)))
 
 def main():
+    """Entry point placeholder; no operation is performed."""
     pass
 
 if __name__ == "__main__":
diff --git a/src/seqlib/LSFlib.py b/src/seqlib/LSFlib.py
index 5fc684d..6b86d74 100644
--- a/src/seqlib/LSFlib.py
+++ b/src/seqlib/LSFlib.py
@@ -1,8 +1,13 @@
-'''
-Created on Jun 29, 2011
+"""Utilities for submitting and monitoring jobs on an IBM Platform LSF cluster.
 
-@author: lgoff
-'''
+Provides the LSFJob class for constructing, submitting, polling, killing, and
+waiting on LSF batch jobs via the bsub/bjobs/bkill command-line tools.  Also
+supports a 'local' pseudo-queue for running commands directly on the current
+host without LSF.
+
+Designed for use with Harvard's Odyssey LSF cluster but applicable to any
+Platform LSF installation.
+"""
 import os
 import re
 import subprocess
@@ -19,26 +24,73 @@
 #Error Handling
 #######################
 class LSFError(Exception):
-	"""Base class for exceptions in this module."""
+	"""Exception raised for LSF-related errors.
+
+	Attributes:
+		value: String or object describing the error condition.
+	"""
 	def __init__(self,value):
+		"""Initialises an LSFError with an error value.
+
+		Args:
+			value: A string or object describing the LSF error.
+		"""
 		self.value = value
+
 	def __str__(self):
+		"""Returns a string representation of the error value."""
 		return repr(self.value)
 
 #################
 #Base Class
 #################
 class LSFJob(object):
-	'''
-	LSF Job
-	'''
-
+	"""Represents a single LSF batch job with lifecycle management.
+
+	Constructs the bsub command string, submits the job to LSF (or runs it
+	locally), and provides methods to poll job status, wait for completion,
+	and kill the job.
+
+	Attributes:
+		cmd_str: The shell command to execute.
+		queue: LSF queue name (or 'local' for local execution).
+		outfile: Path to the stdout capture file.
+		errfile: Path to the stderr capture file.
+		job_name: Optional LSF job name.
+		group: Optional LSF job group.
+		job_mem: Memory requirement in GB (capped at lsf_mem global).
+		submit_flag: True after the job has been submitted.
+		complete: True after the job has finished.
+		status: Current job status string (e.g. 'PEND', 'RUN', 'DONE').
+		jobID: LSF job ID integer (-999 before submission).
+		submit_time: Submission timestamp from bjobs.
+		exec_host: Host on which the job is/was running.
+		submit_host: Host from which the job was submitted.
+		bsub_str: List of tokens forming the complete bsub command.
+	"""
 
 	def __init__(self,cmd_str,job_name=None,job_group=None,blocking=False,outfilename=None,errfilename=None,queue_name=None,job_mem=None,job_cores=1,notify=None):
-		'''
-		Creates instance of LSFJob
-		#Don't use blocking because this is a limiting resource on Odyssey LSF
-		'''
+		"""Creates an LSFJob instance and constructs the bsub command.
+
+		Args:
+			cmd_str: The shell command string to submit as an LSF job.
+			job_name: Optional LSF job name passed to bsub -J.
+			job_group: Optional LSF job group passed to bsub -g.
+			blocking: If True, add -K flag to bsub to block until job
+				completes.  Avoid on Odyssey LSF (limiting resource).
+			outfilename: Path for stdout redirection.  If None, a temporary
+				file in 'tmp/' is created.
+			errfilename: Path for stderr redirection.  If None, a temporary
+				file in 'tmp/' is created.
+			queue_name: LSF queue name.  Defaults to lsf_default_queue.
+				Use 'local' to run without LSF.
+			job_mem: Memory requirement in GB.  Capped at the module-level
+				lsf_mem constant.
+			job_cores: Number of cores requested (stored but not currently
+				used in the bsub command).
+			notify: If truthy, add -N flag to bsub to send email notification
+				on job completion.
+		"""
 		self.cmd_str = cmd_str
 
 		global lsf_default_queue
@@ -108,12 +160,26 @@ def __init__(self,cmd_str,job_name=None,job_group=None,blocking=False,outfilenam
 			self.bsub_str.insert(0,self.cmd_str)
 
 	def __repr__(self):
+		"""Returns a verbose string representation including all attributes."""
 		return "Instance of class LSF Job:\n\t%s\n\tSubmitted: %s\n\t Complete: %s\n" % (self.cmd_str,self.submit_flag,self.complete) + str(self.__dict__)
 
 	def __str__(self):
+		"""Returns the complete bsub command as a space-joined string."""
 		return " ".join(self.bsub_str)
 
 	def submit(self): # wait pend
+		"""Submits the job to LSF (or runs it locally) and waits for it to enter a stable state.
+
+		For LSF jobs, uses subprocess.Popen to call bsub, retrieves the job ID,
+		and polls until the status transitions out of 'SUBMITTED'.  For local
+		jobs, launches the process and returns immediately.
+
+		Returns:
+			0 on successful submission (or 0 for local job launch).
+
+		Raises:
+			LSFError: If the bsub command returns a non-zero exit code.
+		"""
 		if self.submit_flag == True:
 			print("Job already submitted", file=sys.stderr)
 			return 0# what do you return here?
@@ -205,6 +271,12 @@ def poll(self):
 				raise LSFError("Problem with bjobs polling. Error %s" % tmp_err)
 
 	def getJobId(self):
+		"""Parses the LSF job ID from the bsub submission output.
+
+		Extracts the integer job ID from the '<JOBID>' pattern in
+		self.submit_status and stores it in self.jobID.  Prints a message to
+		stdout if the job has not been submitted yet.
+		"""
 		if self.submit_flag:
 			jobID_search = re.search(r"\<[0-9]+\>",self.submit_status)
 			self.jobID = int(jobID_search.group().strip("><"))
@@ -214,6 +286,12 @@ def getJobId(self):
 			return
 
 	def kill(self):
+		"""Kills the LSF job using bkill.
+
+		Does nothing if the job has not been submitted or has no valid job ID.
+		Loops until bkill returns 0, retrying if necessary.  On success, resets
+		status to 'NOT SUBMITTED' and clears submit_flag and complete.
+		"""
 		#Added this to fix cases were kill fails because there is no job id
 		if self.status in ['NOT SUBMITTED'] or self.jobID== -999 :
 			self.status = 'NOT SUBMITTED'
@@ -231,6 +309,13 @@ def kill(self):
 		return
 
 	def wait(self):
+		"""Blocks until the LSF job reaches a terminal state.
+
+		Polls the job status every 30 seconds until status is no longer
+		'SUBMITTED', 'PEND', 'RUN', or 'SUSP'.  Prints a warning to stderr
+		if the job is suspended.  Sets status to 'DONE' and complete to True
+		on exit.
+		"""
 		self.poll()
 		if not self.submit_flag:
 			print("Job not yet submitted")
@@ -249,6 +334,18 @@ def wait(self):
 #Helper functions
 ##############
 def tmp_name(prefix):
+	"""Generates a unique temporary file path inside a local 'tmp/' directory.
+
+	Creates the 'tmp/' directory in the current working directory if it does
+	not already exist, then returns a path of the form
+	'tmp/<prefix><random_suffix>'.
+
+	Args:
+		prefix: String prefix for the temporary file name.
+
+	Returns:
+		A string file path for a temporary file that does not yet exist.
+	"""
 	import tempfile
 	tmp_root = "tmp/"
 	if os.path.exists(tmp_root):
diff --git a/src/seqlib/QCtools.py b/src/seqlib/QCtools.py
index 7655d3a..968858f 100644
--- a/src/seqlib/QCtools.py
+++ b/src/seqlib/QCtools.py
@@ -1,5 +1,10 @@
 #!/usr/bin/env python
 '''
+Quality control tools for sequencing data.
+
+Provides a FASTQ file parser and a position-weight matrix (PWM) builder for
+inspecting base-composition biases across read positions.
+
 Created on May 6, 2010
 
 @author: lgoff
@@ -9,6 +14,27 @@
 
 
 def makePWM(fastqFile,readLen,freq=True):
+    """Build a position-weight matrix of base composition from a FASTQ file.
+
+    Iterates over all records in a FASTQ file and tallies the occurrence of
+    each nucleotide (A, C, G, T) at every position across ``readLen``
+    positions.  Ambiguous bases (e.g. 'N') are silently ignored.
+    Optionally converts raw counts to per-position frequencies.
+
+    Args:
+        fastqFile: Path to the FASTQ file to process.
+        readLen: Expected read length (number of positions to track).
+        freq: If True (default), each base count vector is divided by the
+            total count at that position to produce a frequency.  If False,
+            raw counts are returned.
+
+    Returns:
+        A dict with keys 'A', 'C', 'G', 'T', and 'Total'.  Each key maps to
+        a numpy array of length ``readLen``.  The 'Total' array contains the
+        total number of valid base observations at each position; the
+        individual base arrays contain either counts or frequencies depending
+        on the ``freq`` argument.
+    """
     bases = ['A','C','G','T']
     pwm = {
            'A':np.zeros(readLen),
@@ -37,6 +63,27 @@ def makePWM(fastqFile,readLen,freq=True):
 #Parsers
 ################
 def FastqIterator(fastqFile):
+    """Iterate over records in a FASTQ file.
+
+    Skips any non-FASTQ header text at the start of the file (lines that do
+    not begin with '@') and then yields one dict per record.  The file is
+    expected to use standard four-line FASTQ format: a '@'-prefixed name
+    line, a sequence line, a '+' line, and a quality line.
+
+    Args:
+        fastqFile: Path to the FASTQ file to parse.
+
+    Yields:
+        A dict with keys:
+            ``'name'``: Read name string (the '@' prefix is stripped).
+            ``'sequence'``: Nucleotide sequence string.
+            ``'quals'``: ASCII quality string.
+
+    Raises:
+        ValueError: If a record's name line does not start with '@'.
+        ValueError: If the separator line between sequence and qualities
+            does not start with '+'.
+    """
     handle = open(fastqFile,'r')
     #Skip any header text
     while True:
diff --git a/src/seqlib/RIPDiff.py b/src/seqlib/RIPDiff.py
index 210f3ee..730be66 100644
--- a/src/seqlib/RIPDiff.py
+++ b/src/seqlib/RIPDiff.py
@@ -1,11 +1,15 @@
-'''
-Created on May 13, 2010
+"""Framework for RIP-Seq differential enrichment analysis.
 
-Normalizes and compares RIP vs Control (IgG or total RNA) to identify segments of transcripts that are
-preferrentially enriched in RIP
+Provides skeletal classes and functions for comparing RNA Immunoprecipitation
+(RIP) sequencing data against an isotype control (IgG) or total RNA input to
+identify transcript segments preferentially enriched in the RIP sample.
 
-@author: lgoff
-'''
+RIP-Seq (RNA Immunoprecipitation followed by Sequencing) is used to identify
+RNA molecules bound by a specific RNA-binding protein.
+
+Note: This module is largely unimplemented (placeholder pass statements) and
+is retained as a design scaffold for future development.
+"""
 ##################
 #Imports
 ##################
@@ -16,28 +20,53 @@
 ##################
 
 class RIPUnit(intervallib.Interval):
-    """
-    Can be individual transcript or some basic unit being interrogated for differential peaks (ie. chromosome)
-    Extends intervallib.Interval class
+    """A genomic interval unit used as the basic unit of RIP-Seq differential analysis.
+
+    Can represent an individual transcript or any other genomic region (e.g. a
+    whole chromosome) that is to be tested for differential read enrichment
+    between a RIP sample and its control.  Extends intervallib.Interval.
+
+    Note: All methods are currently unimplemented placeholders.
     """
     def __init__(self,interval):
-        """Initiate from existing instance of Interval class only"""
+        """Initialises a RIPUnit from an existing Interval instance.
+
+        Args:
+            interval: An intervallib.Interval object to copy coordinates from.
+
+        Raises:
+            AssertionError: If interval is not an instance of
+                intervallib.Interval.
+        """
         assert isinstance(interval,intervallib.Interval)
         intervallib.Interval.__init__(interval)
 
     def scan(self):
+        """Scans the interval for differential RIP peaks (not implemented)."""
         pass
 
     def makebins(self,binSize):
+        """Divides the interval into bins of the given size (not implemented).
+
+        Args:
+            binSize: Size of each bin in base pairs.
+        """
         pass
 
     def binBinom(self):
+        """Applies a binomial test to each bin (not implemented)."""
         pass
 
     def binPois(self):
+        """Applies a Poisson test to each bin (not implemented)."""
         pass
 
     def fetchReads(self,bamHandle):
+        """Fetches aligned reads overlapping this interval from a BAM file (not implemented).
+
+        Args:
+            bamHandle: A pysam AlignmentFile handle.
+        """
         pass
 
 
@@ -45,7 +74,21 @@ def fetchReads(self,bamHandle):
 #Functions
 #################
 def globalNorm(ripUnit,totReads):
+    """Applies global normalisation to a RIPUnit based on total library size (not implemented).
+
+    Args:
+        ripUnit: A RIPUnit object representing the region to normalise.
+        totReads: Total number of mapped reads in the library, used as the
+            normalisation denominator.
+    """
     pass
 
 def localNorm(ripUnitA,ripUnitB):
+    """Applies local normalisation between two RIPUnit objects (not implemented).
+
+    Args:
+        ripUnitA: A RIPUnit from the experimental (RIP) sample.
+        ripUnitB: A RIPUnit from the control (IgG or input) sample for the
+            same genomic region.
+    """
     pass
diff --git a/src/seqlib/algorithms.py b/src/seqlib/algorithms.py
index 2184c51..bb3ac96 100644
--- a/src/seqlib/algorithms.py
+++ b/src/seqlib/algorithms.py
@@ -1,30 +1,61 @@
 # python libs
+"""Algorithmic data structures and search utilities for sequence analysis.
 
+Provides Union-Find disjoint set, QuadTree spatial indexing, and binary search
+implementations used throughout the seqlib package.
+"""
 
 
 #=============================================================================
 
 class UnionFind:
-    """An implementation of the UNINON/FIND algorithm"""
+    """An implementation of the UNION/FIND algorithm for disjoint sets.
+
+    Supports efficient union and membership queries using path compression.
+    Each UnionFind instance represents a single set; sets can be merged via
+    union() and queried for shared membership via same().
+    """
 
     def __init__(self, items):
+        """Initialize a new UnionFind set containing the given items.
+
+        Args:
+            items: An iterable of hashable items to populate the initial set.
+        """
         self.parent = None
         self.items = dict.fromkeys(items, 1)
 
     def __contains__(self):
+        """Return True if item is a member of the root set."""
         return item in self.root().items
 
     def __len__(self):
+        """Return the number of items in the root set."""
         return len(self.root().items)
 
     def __iter__(self):
+        """Iterate over the items in the root set."""
         return iter(self.root().items)
 
 
     def add(self, item):
+        """Add an item to the root set.
+
+        Args:
+            item: A hashable item to add to the set.
+        """
         self.root().items[item] = 1
 
     def root(self):
+        """Return the root UnionFind node for this set, applying path compression.
+
+        Traverses parent pointers to find the canonical representative of the
+        set. As a side effect, compresses the path by pointing this node
+        directly at the root.
+
+        Returns:
+            The root UnionFind node representing this disjoint set.
+        """
         node = self
         while node.parent:
             node = node.parent
@@ -33,9 +64,26 @@ def root(self):
         return node
 
     def same(self, other):
+        """Return True if this set and other share the same root (are in the same set).
+
+        Args:
+            other: Another UnionFind instance to compare against.
+
+        Returns:
+            True if both instances belong to the same disjoint set, False otherwise.
+        """
         return self.root() == other.root()
 
     def union(self, other):
+        """Merge this set with other so that all members belong to a single set.
+
+        If both sets already share the same root, this is a no-op. Otherwise,
+        all items from other's root are merged into this set's root, and
+        other's root is reparented.
+
+        Args:
+            other: Another UnionFind instance to merge with this set.
+        """
         root1 = self.root()
         root2 = other.root()
         if root1 == root2:
@@ -46,6 +94,11 @@ def union(self, other):
         root2.parent = root1
 
     def members(self):
+        """Return a view of all items belonging to this set.
+
+        Returns:
+            A dict_keys view of all items in the root set.
+        """
         return self.root().items.keys()
 
 
@@ -64,9 +117,22 @@ def size(self):
 # QuadTree data structure
 
 class Rect:
-    """A representation of a rectangle"""
+    """A representation of an axis-aligned rectangle.
+
+    Stores the bounding box as (x1, y1) lower-left and (x2, y2) upper-right
+    corners, normalizing the coordinates so that x1 <= x2 and y1 <= y2
+    regardless of the order the arguments are supplied.
+    """
 
     def __init__(self, x1, y1, x2, y2):
+        """Initialize a Rect, normalizing so that (x1, y1) is the lower-left corner.
+
+        Args:
+            x1: X coordinate of one horizontal boundary.
+            y1: Y coordinate of one vertical boundary.
+            x2: X coordinate of the other horizontal boundary.
+            y2: Y coordinate of the other vertical boundary.
+        """
         if x1 < x2:
             self.x1 = x1
             self.x2 = x2
@@ -81,19 +147,51 @@ def __init__(self, x1, y1, x2, y2):
             self.y2 = y1
 
 class QuadNode:
+    """A single entry stored in a QuadTree leaf node.
+
+    Associates an arbitrary item with the bounding Rect used for spatial
+    indexing inside the QuadTree.
+    """
+
     item = None
     rect = None
 
     def __init__(self, item, rect):
+        """Initialize a QuadNode with an item and its bounding rectangle.
+
+        Args:
+            item: The object to store (any type).
+            rect: A Rect instance representing the spatial extent of item.
+        """
         self.item = item
         self.rect = rect
 
 
 class QuadTree:
+    """A spatial index that partitions 2-D space into four quadrants recursively.
+
+    Items are stored alongside their bounding Rect. When a leaf node exceeds
+    MAX items and has not yet reached MAX_DEPTH, it is split into four child
+    QuadTree nodes and its items are redistributed. Items whose bounding
+    rectangles span multiple quadrants are stored in every overlapping child.
+
+    Class attributes:
+        MAX: Maximum number of items in a leaf before splitting (default 10).
+        MAX_DEPTH: Maximum recursion depth allowed for splits (default 10).
+    """
+
     MAX = 10
     MAX_DEPTH = 10
 
     def __init__(self, x, y, size, depth = 0):
+        """Initialize a QuadTree node centered at (x, y) with a given half-size.
+
+        Args:
+            x: X coordinate of this node's center.
+            y: Y coordinate of this node's center.
+            size: Half-width (and half-height) of the region covered by this node.
+            depth: Current depth of this node in the tree (0 for the root).
+        """
         self.nodes = []
         self.children = []
         self.center = [x, y]
@@ -101,6 +199,17 @@ def __init__(self, x, y, size, depth = 0):
         self.depth = depth
 
     def insert(self, item, rect):
+        """Insert an item with the given bounding rectangle into the tree.
+
+        If this node is a leaf, the item is appended to the local node list.
+        If the leaf then exceeds MAX items and depth allows, the node is split.
+        If this node already has children, the item is forwarded to the
+        appropriate child(ren).
+
+        Args:
+            item: The object to store.
+            rect: A Rect instance representing the spatial extent of item.
+        """
         if len(self.children) == 0:
             self.nodes.append(QuadNode(item, rect))
 
@@ -110,6 +219,15 @@ def insert(self, item, rect):
             self.insertIntoChildren(item, rect)
 
     def insertIntoChildren(self, item, rect):
+        """Forward an item into every child quadrant that its bounding rect overlaps.
+
+        The four children are ordered: [bottom-left, top-left, bottom-right,
+        top-right] relative to the center of this node.
+
+        Args:
+            item: The object to store.
+            rect: A Rect instance representing the spatial extent of item.
+        """
         if rect.x1 < self.center[0]:
             if rect.y1 < self.center[1]:
                 self.children[0].insert(item, rect)
@@ -122,6 +240,12 @@ def insertIntoChildren(self, item, rect):
                 self.children[3].insert(item, rect)
 
     def split(self):
+        """Split this leaf node into four child QuadTree nodes.
+
+        Creates four children covering the four quadrants of this node's
+        region, then redistributes all currently held items into the children.
+        After splitting, the local node list is cleared.
+        """
         self.children = [QuadTree(self.center[0] - self.size/2,
                                   self.center[1] - self.size/2,
                                   self.size/2, self.depth + 1),
@@ -140,6 +264,24 @@ def split(self):
         self.nodes = []
 
     def query(self, rect, results = {}, ret = True):
+        """Return all items whose bounding rectangles overlap the query rect.
+
+        Recursively traverses child nodes that overlap rect. At leaf nodes,
+        items whose stored Rect intersects rect are added to the result set.
+        The results dict is used for deduplication (items are keys).
+
+        Args:
+            rect: A Rect instance defining the query region.
+            results: A dict used internally to accumulate results across
+                recursive calls. Callers should not pass this argument.
+            ret: If True (the default for the top-level call), the method
+                returns the keys of the results dict. Recursive calls pass
+                False to suppress the return.
+
+        Returns:
+            A dict_keys view of all items that overlap rect, or None when
+            called recursively (ret=False).
+        """
         if ret:
             results = {}
 
@@ -164,6 +306,12 @@ def query(self, rect, results = {}, ret = True):
             return results.keys()
 
     def getSize(self):
+        """Return the total number of items stored in this node and all descendants.
+
+        Returns:
+            An integer count of all QuadNode items held in the subtree rooted
+            at this node.
+        """
         size = 0
         for child in self.children:
             size += child.getSize()
@@ -174,15 +322,27 @@ def getSize(self):
 # TODO: make a funtion based linear search
 
 def binsearch(lst, val, compare=None, order=1):
-    """Performs binary search for val in lst using compare
-
-       if val in lst:
-          Returns (i, i) where lst[i] == val
-       if val not in lst
-          Returns index i,j where
-            lst[i] < val < lst[j]
-
-       runs in O(log n)
+    """Perform binary search for val in lst, returning a bracket of indices.
+
+    Runs in O(log n). If val is found exactly, both elements of the returned
+    tuple are the same index. If val is not found, the tuple brackets the
+    position where val would be inserted.
+
+    Args:
+        lst: A sorted sequence to search.
+        val: The value to search for.
+        compare: An optional two-argument callable compare(a, b) that returns
+            -1, 0, or 1 (like the old cmp function). Defaults to numeric
+            comparison via subtraction of boolean comparisons.
+        order: 1 for ascending sort order, -1 for descending. Defaults to 1.
+
+    Returns:
+        A tuple (i, j) where:
+            - (i, i) if lst[i] == val (exact match).
+            - (i, None) if val is beyond the high end of lst.
+            - (None, j) if val is before the low end of lst.
+            - (i, j) with i < j if val falls between lst[i] and lst[j].
+            - (None, None) if lst is empty.
     """
     if compare is None:
         def compare(a, b): return (a > b) - (a < b)
diff --git a/src/seqlib/blockIt.py b/src/seqlib/blockIt.py
index 0c5f032..698442a 100644
--- a/src/seqlib/blockIt.py
+++ b/src/seqlib/blockIt.py
@@ -1,4 +1,10 @@
 '''
+Block-iT miRNA expression vector insert design utilities.
+
+Given a 21-mer siRNA candidate sequence, generates the forward and reverse
+oligonucleotide sequences required for cloning into the pcDNA6.2-GW/Em-GFP/miR
+expression vector (Invitrogen Block-iT Kit).
+
 Created on Oct 14, 2009
 Takes as input a 21mer sequence (candidate siRNA) and creates the appropriate fwd and rev oligo
 sequences to order for insertion into the pcDNA6.2-GW/Em-GFP/miR expression vector from
@@ -15,12 +21,36 @@
 revAdapter = 'CCTG'
 
 def makeBlockItInsert(seq):
+    """Design forward and reverse oligos for a Block-iT miRNA insert.
+
+    Constructs the forward strand by concatenating the fixed forward adapter,
+    the reverse complement of seq, the loop sequence, and a modified copy of
+    seq (positions 0-7 joined directly to positions 10 onward, skipping 8-9).
+    The reverse strand is the reverse complement of the forward strand
+    (excluding the first four adapter bases), prefixed by the reverse adapter.
+
+    Args:
+        seq: A 21-nucleotide DNA string representing the candidate siRNA
+            sense sequence (5' to 3').
+
+    Returns:
+        A tuple (fwdStrand, revStrand) where both elements are DNA strings
+        suitable for ordering as oligonucleotides.
+    """
     fwdStrand = fwdAdapter+sequence.reverse_complement(seq)+loopSequence+seq[:8]+seq[10:]
     revStrand = revAdapter+sequence.reverse_complement(fwdStrand[4:])
-    return (fwdStrand,revStrand)
+    return (fwdStrand, revStrand)
 
 def printBlockIt(seqs):
-    """Takes as input the tuple returned from makeBlockItInsert and prints the result to stdout"""
+    """Print the forward and reverse oligo sequences from a Block-iT insert tuple.
+
+    Prints each strand labeled 'FWD' or 'REV' to stdout. Also computes a
+    base-pairing alignment string between the forward and reverse strands,
+    though the alignment is computed but not printed.
+
+    Args:
+        seqs: A tuple (fwdStrand, revStrand) as returned by makeBlockItInsert.
+    """
     print("FWD:\t%s" % seqs[0])
     print("REV:\t%s" % seqs[1])
 
diff --git a/src/seqlib/bowtie.py b/src/seqlib/bowtie.py
index 074a40a..10a5f65 100644
--- a/src/seqlib/bowtie.py
+++ b/src/seqlib/bowtie.py
@@ -1,20 +1,25 @@
 '''
-Created on Dec 15, 2009
+Python tools for running Bowtie in colorspace mode on the Broad Institute cluster.
 
-Python tools for bowtie in colorspace (on Broad cluster)
+Provides helpers for preparing SOLiD colorspace reads for Bowtie alignment
+and for submitting alignment jobs to an LSF cluster.  The pipeline is:
 
-@author: lgoff
+1. Make colorspace FASTQ files from ``.csfasta`` and ``.qual`` files
+   (see ``solid.py`` or ``makeFastq.py``).
+2. Align reads with ``bowtie`` using the ``-C`` (colorspace) and ``-S``
+   (SAM output) flags.
+3. Process the resulting SAM/BAM files with the tools in ``mySam.py`` or
+   ``bwa.py``.
+
+The module-level constant ``hg18_bowtieIndex`` points to the colorspace
+Bowtie index used by the original author; update it for other references.
 
-Pipeline:
-1) make cs .fastq file from .csfasta and .qual (see solid.py or use makeFastq.py in scripts)
-2) Align reads in .fastq file using bowtie and specify SAM output with -S flag
-3) Enjoy your alignments!
+Created on Dec 15, 2009
 
+@author: lgoff
 
 Example commandline:
 bowtie -C -t -S -n 2 -k 1 -p 4 --best /seq/compbio-hp/lgoff/genomes/hg18/bowtie/hg18_c head0073_20090130_1Uppsala1_Upp1_F3_no_header.csfasta >head0073_20090130_1Uppsala1_Upp1_F3_bowtie.sam 2>bowtie.err
-"""
-
 '''
 ############
 #Imports
@@ -35,6 +40,30 @@
 ########
 
 def prepBowtie(csfile,qualfile,shortname,basedir,split=100000,readsdir="fastq/",resultsdir="results/"):
+    """Prepare SOLiD colorspace reads for a Bowtie alignment run.
+
+    Validates input file extensions, generates split FASTQ files from the
+    colorspace FASTA and quality files using ``solid.makeFastq``, and
+    creates the results output directory if it does not already exist.
+
+    Args:
+        csfile: Path to the SOLiD colorspace FASTA file (must end with
+            ``.csfasta``).
+        qualfile: Path to the quality score file (must end with ``.qual``).
+        shortname: Base name used when naming the output FASTQ files.
+        basedir: Base directory for the project (currently unused in the
+            function body but reserved for future use).
+        split: Maximum number of reads per split FASTQ file.  Defaults to
+            100000.
+        readsdir: Subdirectory path (relative to cwd) into which the FASTQ
+            files are written.  Defaults to ``'fastq/'``.
+        resultsdir: Subdirectory path (relative to cwd) that will receive
+            Bowtie output.  Created if absent.  Defaults to ``'results/'``.
+
+    Raises:
+        ValueError: If ``csfile`` does not end with ``.csfasta``.
+        ValueError: If ``qualfile`` does not end with ``.qual``.
+    """
     if not csfile.endswith('.csfasta'):
         raise ValueError("prepBowtie requires a .csfasta file")
     if not qualfile.endswith('.qual'):
@@ -49,6 +78,19 @@ def prepBowtie(csfile,qualfile,shortname,basedir,split=100000,readsdir="fastq/",
     return
 
 def runBowtie(queue="broad",cwd=os.getcwd(),outDir = "../results/"):
+    """Submit colorspace Bowtie alignment jobs to an LSF cluster.
+
+    Scans ``cwd`` for files ending in ``.fastq`` and submits one LSF
+    ``bsub`` job per file.  Each job runs Bowtie in colorspace mode
+    (``-C``), reporting a single best-alignment SAM file per input.
+
+    Args:
+        queue: LSF queue name to submit jobs to.  Defaults to ``'broad'``.
+        cwd: Directory to scan for ``.fastq`` files.  Defaults to the
+            current working directory at import time.
+        outDir: Directory (relative or absolute) into which the SAM and
+            error files are written.  Defaults to ``'../results/'``.
+    """
     files = os.listdir(cwd)
     for file in files:
         if file.endswith(".fastq"):
diff --git a/src/seqlib/bwa.py b/src/seqlib/bwa.py
index 359b589..f999def 100644
--- a/src/seqlib/bwa.py
+++ b/src/seqlib/bwa.py
@@ -1,6 +1,16 @@
 '''
+Python wrappers for the BWA short-read alignment algorithm.
+
+Provides helper functions for submitting BWA alignment jobs to an LSF
+cluster (``bsub``), converting SAM output to sorted BAM files, and parsing
+SAM records.  Also includes utilities for converting pileup output to UCSC
+wiggle format.
+
+The module-level ``prefix`` and ``ref_index`` constants point to the hg18
+reference genome used by the original author; update these for other
+references.
+
 Created on Jul 30, 2009
-Python wrappers for BWA algorithm
 
 @author: lgoff
 
@@ -20,19 +30,62 @@
 
 #=================
 class SAMAlignment(Alignment):
+    """SAM alignment record with CIGAR and quality-string fields.
+
+    Extends the Alignment base class with the two SAM-specific fields that
+    are not part of the generic Alignment interface.
+
+    Attributes:
+        qual: ASCII-encoded base-quality string (SAM field 11).
+        cigar: CIGAR string describing the alignment operations (SAM field 6).
+    """
+
     def __init__(self,readname,chr,start,end,strand,score,readcount,readsequence,cigar,qualstring):
+        """Initialise a SAMAlignment.
+
+        Args:
+            readname: Query template name (SAM field 1).
+            chr: Reference sequence name / chromosome (SAM field 3).
+            start: 1-based leftmost mapping position (SAM field 4).
+            end: Computed end position (start + read length - 1).
+            strand: Strand of the alignment, '+' or '-'.
+            score: Mapping quality score (SAM field 5).
+            readcount: Number of reads represented (typically 1).
+            readsequence: Read sequence bases (SAM field 10).
+            cigar: CIGAR string (SAM field 6).
+            qualstring: ASCII-encoded base-quality string (SAM field 11).
+        """
         Alignment.__init__(self,readname,chr,start,end,strand,score=readcount,readcount = readcount,readsequence=readsequence)
         self.qual = qualstring
         self.cigar = cigar
 
 def SAMReader(fname):
-    """Iterator for SAMAlignment records"""
+    """Iterate over SAM alignment records from a file.
+
+    Args:
+        fname: Path to the SAM file.
+
+    Yields:
+        An Interval object for each alignment record in the file.
+    """
     handle = open(fname,'r')
     for line in handle:
         aln = parseSAMString(line)
         yield aln.toInterval()
 
 def parseSAMString(samstring):
+    """Parse a single SAM-format line into a SAMAlignment object.
+
+    The end position is derived from the start position plus the length of
+    the read sequence field; this is only correct for non-spliced alignments.
+
+    Args:
+        samstring: A single tab-delimited SAM record line (trailing whitespace
+            is stripped internally).
+
+    Returns:
+        A SAMAlignment instance populated from the SAM fields.
+    """
     tokens = samstring.rstrip().split("\t")
     readname = tokens[0]
     chr = tokens[2]
@@ -47,9 +100,31 @@ def parseSAMString(samstring):
     return SAMAlignment(readname,chr,start,end,strand,score,readcount,readsequence,cigar,qualstring)
 
 def joinSAMIntervals(iter,start='start',end='end',offset=0):
-    """
-    Returns a list of independent non-overlapping intervals for each strand overlapping by offset if set
-    ***SAM file must first be sorted using 'samtools sort'***
+    """Merge overlapping SAM intervals into non-overlapping intervals, per strand.
+
+    Groups intervals by strand ('+' or '-'), then iterates through each
+    group in order and merges any pair of intervals that intersect (with
+    optional extension by ``offset``).  Each merged interval stores its
+    constituent child intervals and reports their count as ``readcount``.
+
+    The SAM file must be sorted with ``samtools sort`` before use.
+
+    Args:
+        iter: An iterable of Interval (or Alignment) objects already
+            loaded from a sorted SAM file.  Each must have a ``strand``
+            attribute of '+' or '-'.
+        start: Name of the start-coordinate attribute used when testing
+            intersection.  Defaults to 'start'.
+        end: Name of the end-coordinate attribute used when testing
+            intersection.  Defaults to 'end'.
+        offset: Number of bases by which interval extents are extended
+            before testing for overlap.  Defaults to 0.
+
+    Returns:
+        A dict with keys '+' and '-', each mapping to a list of merged
+        Interval objects for that strand.  Each merged interval has a
+        ``readcount`` equal to the number of constituent child reads and
+        a ``children`` list of those child intervals.
     """
 
     overlapping_plus = []
@@ -91,6 +166,18 @@ def joinSAMIntervals(iter,start='start',end='end',offset=0):
     return res
 
 def bwaAlignSubmit(files,mismatches=2,queue='hugemem'):
+    """Submit BWA alignment jobs (``bwa aln``) to an LSF cluster.
+
+    For each input FASTQ file, constructs and submits an LSF ``bsub`` job
+    that runs ``bwa aln`` against the module-level ``prefix`` reference and
+    writes a ``.sai`` alignment index file.
+
+    Args:
+        files: A list of FASTQ file paths to align.
+        mismatches: Maximum number of mismatches allowed in the seed region
+            (passed to ``bwa aln -n``).  Defaults to 2.
+        queue: LSF queue name to submit jobs to.  Defaults to 'hugemem'.
+    """
     for fname in files:
         shortname = fname.rstrip(".fastq")
         command = "bsub -q %s -N -o /dev/null -P BWA_Align 'bwa aln -c -n %d %s %s >%s.sai 2>%s.e'" % (queue,mismatches,prefix,fname,shortname,shortname)
@@ -98,6 +185,19 @@ def bwaAlignSubmit(files,mismatches=2,queue='hugemem'):
     return
 
 def bwaSamseSubmit(files,mismatches=2,queue='broad'):
+    """Submit BWA SAM conversion jobs (``bwa samse``) to an LSF cluster.
+
+    For each ``.sai`` file, constructs and submits an LSF ``bsub`` job that
+    runs ``bwa samse`` to convert the alignment index back to SAM format,
+    writing a ``.sam`` file.  Assumes a matching ``.fastq`` file exists with
+    the same base name.
+
+    Args:
+        files: A list of ``.sai`` file paths to convert.
+        mismatches: Unused parameter kept for interface compatibility.
+            Defaults to 2.
+        queue: LSF queue name to submit jobs to.  Defaults to 'broad'.
+    """
     for fname in files:
         shortname = fname.rstrip(".sai")
         command = "bsub -q %s -N -o /dev/null -P BWA_Samse 'bwa samse %s %s.sai %s.fastq >%s.sam 2>%s.e'" % (queue,prefix,shortname,shortname,shortname,shortname)
@@ -105,6 +205,16 @@ def bwaSamseSubmit(files,mismatches=2,queue='broad'):
     return
 
 def makeBam(files,queue='broad'):
+    """Submit SAM-to-BAM conversion jobs (``samtools view``) to an LSF cluster.
+
+    For each SAM file, constructs and submits an LSF ``bsub`` job that uses
+    ``samtools view`` to convert it to a BAM file indexed against the
+    module-level ``ref_index`` FASTA index.
+
+    Args:
+        files: A list of SAM file paths to convert.
+        queue: LSF queue name to submit jobs to.  Defaults to 'broad'.
+    """
     for fname in files:
         shortname = fname.rstrip("*.sam")
         command = "bsub -q %s -N -o /dev/null -P SAM2BAM 'samtools view -h -bt %s -o %s.bam %s 2>%s.bam.e'" % (queue,ref_index,shortname,fname,shortname)
@@ -112,6 +222,17 @@ def makeBam(files,queue='broad'):
     return
 
 def samSort(files,queue='broad'):
+    """Sort BAM files by coordinate using ``samtools sort``.
+
+    Iterates over a list of BAM files, printing a status message for each,
+    and runs ``samtools sort`` locally (not via LSF) to produce a
+    ``*_sorted.bam`` output file.
+
+    Args:
+        files: A list of BAM file paths to sort.
+        queue: Unused parameter kept for interface consistency with other
+            submit functions.  Defaults to 'broad'.
+    """
     for fname in files:
         shortname = fname.rstrip("*.bam")+"_sorted"
         command = "samtools sort %s %s" % (fname,shortname)
@@ -122,6 +243,20 @@ def samSort(files,queue='broad'):
 
 
 def pileup2wig(fname,shortname,outDir=os.getcwd()+"/"):
+    """Convert a samtools pileup file to strand-specific wiggle files.
+
+    Reads a samtools pileup output file and writes two variableStep wiggle
+    files: one for the plus strand (forward reads, '.' characters) and one
+    for the minus strand (reverse reads, ',' characters).
+
+    Args:
+        fname: Path to the samtools pileup file to read.
+        shortname: Base name used for the wiggle track labels and the output
+            file names (``<shortname>_plus.wig`` and
+            ``<shortname>_minus.wig``).
+        outDir: Directory in which the output wiggle files are written.
+            Defaults to the current working directory.
+    """
     handle = open(fname,'r')
     preRef = ''
     prePos = -1
@@ -132,6 +267,17 @@ def pileup2wig(fname,shortname,outDir=os.getcwd()+"/"):
     minusHand = open(outDir+shortname+"_minus.wig",'w')
 
     def wigHeader(shortname,strand):
+        """Build a UCSC wiggle track-definition header line.
+
+        Args:
+            shortname: Base name used in the track name and description.
+            strand: Strand of the track; '+' produces a blue track,
+                '-' produces a red track.
+
+        Returns:
+            A wiggle track header string suitable for the first line of a
+            wiggle file.
+        """
         if strand=="+":
             color = '0,0,255'
             sName = 'plus'
@@ -163,14 +309,34 @@ def wigHeader(shortname,strand):
 
 
 def getBitValue(n, p):
-    '''
-    get the bitvalue of denary (base 10) number n at the equivalent binary
-    position p (binary count starts at position 0 from the right)
-    '''
+    """Return the bit at position p of integer n.
+
+    Extracts a single bit at binary position p (zero-indexed from the
+    least-significant bit) of the integer n.
+
+    Args:
+        n: A non-negative integer to inspect.
+        p: Zero-based bit position (0 = least-significant / rightmost bit).
+
+    Returns:
+        1 if the bit at position p is set, 0 otherwise.
+    """
     return (n >> p) & 1
 
 def strandFlag(flag):
-    """Returns strand of sequence from SAM record bitflag (field 4)"""
+    """Determine the alignment strand from a SAM bitflag value.
+
+    Inspects bit 4 (0x10) of the SAM FLAG field to determine whether a read
+    mapped to the reverse strand.
+
+    Args:
+        flag: The integer SAM FLAG value (field 2), or a string
+            representation of it.
+
+    Returns:
+        '+' if bit 4 is 0 (forward strand), '-' if bit 4 is 1 (reverse
+        strand), or '*' for any other value.
+    """
     flag = int(flag)
     if getBitValue(flag,4)==0:
         return "+"
diff --git a/src/seqlib/clustering.py b/src/seqlib/clustering.py
index fa8fd93..25c4bbc 100644
--- a/src/seqlib/clustering.py
+++ b/src/seqlib/clustering.py
@@ -1,4 +1,10 @@
 '''
+K-means clustering implementation for n-dimensional point data.
+
+Provides Point and Cluster data structures along with a K-means clustering
+algorithm and Euclidean distance metric for grouping arbitrary numeric
+coordinate data.
+
 Created on Nov 26, 2010
 
 @author: lgoff
@@ -10,6 +16,15 @@
 
 #Classes
 class Point:
+    """A point in n-dimensional space.
+
+    Attributes:
+        coords: A list of numeric coordinates, one per dimension.
+        n: The number of dimensions (length of coords).
+        reference: An optional object associated with this point (e.g. an
+            original data record).
+    """
+
     # -- The Point class represents points in n-dimensional space
     # Instance variables
     # self.coords is a list of coordinates for this Point
@@ -17,20 +32,53 @@ class Point:
     # self.reference is an object bound to this Point
     # Initialize new Points
     def __init__(self, coords, reference=None):
+        """Initialize a Point with a coordinate list and optional reference.
+
+        Args:
+            coords: A list of numeric values representing the coordinates of
+                this point in n-dimensional space.
+            reference: An optional object to associate with this point.
+                Defaults to None.
+        """
         self.coords = coords
         self.n = len(coords)
         self.reference = reference
+
     # Return a string representation of this Point
     def __repr__(self):
+        """Return a string representation of the coordinate list."""
         return str(self.coords)
 
 class Cluster:
+    """A cluster of Points in n-dimensional space used by the K-means algorithm.
+
+    All Points in a Cluster must share the same number of dimensions. The
+    cluster maintains a centroid (the coordinate-wise mean of its points)
+    which is recalculated whenever the cluster's membership changes.
+
+    Attributes:
+        points: A list of Point objects belonging to this cluster.
+        n: The number of dimensions of the Points in this cluster.
+        centroid: A Point representing the sample mean of all cluster points.
+    """
+
     # -- The Cluster class represents clusters of points in n-dimensional space
     # Instance variables
     # self.points is a list of Points associated with this Cluster
     # self.n is the number of dimensions this Cluster's Points live in
     # self.centroid is the sample mean Point of this Cluster
     def __init__(self, points):
+        """Initialize a Cluster from a non-empty list of same-dimensional Points.
+
+        Args:
+            points: A non-empty list of Point objects, all with the same
+                number of dimensions.
+
+        Raises:
+            Exception: If points is empty ('ILLEGAL: EMPTY CLUSTER').
+            Exception: If points contain mixed dimensionality
+                ('ILLEGAL: MULTISPACE CLUSTER').
+        """
         # We forbid empty Clusters (they don't make mathematical sense!)
         if len(points) == 0: raise Exception("ILLEGAL: EMPTY CLUSTER")
         self.points = points
@@ -41,19 +89,44 @@ def __init__(self, points):
             if p.n != self.n: raise Exception("ILLEGAL: MULTISPACE CLUSTER")
         # Figure out what the centroid of this Cluster should be
         self.centroid = self.calculateCentroid()
+
     # Return a string representation of this Cluster
     def __repr__(self):
+        """Return a string representation of the list of Points in this cluster."""
         return str(self.points)
+
     # Update function for the K-means algorithm
     # Assigns a new list of Points to this Cluster, returns centroid difference
     def update(self, points):
+        """Replace this cluster's points and return how far the centroid moved.
+
+        Used during each iteration of the K-means algorithm to reassign points
+        and measure convergence.
+
+        Args:
+            points: A new list of Point objects to assign to this cluster.
+
+        Returns:
+            The Euclidean distance between the old centroid and the new
+            centroid after recalculation.
+        """
         old_centroid = self.centroid
         self.points = points
         self.centroid = self.calculateCentroid()
         return getDistance(old_centroid, self.centroid)
+
     # Calculates the centroid Point - the centroid is the sample mean Point
     # (in plain English, the average of all the Points in the Cluster)
     def calculateCentroid(self):
+        """Compute and return the centroid of the current cluster points.
+
+        The centroid is the coordinate-wise arithmetic mean of all Points
+        in the cluster.
+
+        Returns:
+            A new Point whose coordinates are the mean of each dimension
+            across all points in the cluster.
+        """
         centroid_coords = []
         # For each coordinate:
         for i in range(self.n):
@@ -67,6 +140,23 @@ def calculateCentroid(self):
 
 # -- Return Clusters of Points formed by K-means clustering
 def kmeans(points, k, cutoff):
+    """Cluster points into k groups using the K-means algorithm.
+
+    Randomly selects k seed points and iteratively reassigns every point to
+    the nearest cluster centroid, updating centroids after each round. Stops
+    when the largest centroid shift in a single iteration falls below cutoff.
+
+    Args:
+        points: A list of Point objects to cluster. All points must have the
+            same dimensionality.
+        k: The number of clusters to form.
+        cutoff: A float convergence threshold. Iteration stops when the
+            maximum centroid displacement across all clusters is less than
+            this value.
+
+    Returns:
+        A list of k Cluster objects containing the final cluster assignments.
+    """
     # Randomly sample k Points from the points list, build Clusters around them
     initial = random.sample(points, k)
     clusters = []
@@ -104,6 +194,20 @@ def kmeans(points, k, cutoff):
 ######
 # -- Get the Euclidean distance between two Points
 def getDistance(a, b):
+    """Return the Euclidean distance between two Points.
+
+    Args:
+        a: A Point object.
+        b: A Point object in the same dimensional space as a.
+
+    Returns:
+        A float representing the Euclidean (straight-line) distance between
+        the two points.
+
+    Raises:
+        Exception: If a and b have different numbers of dimensions
+            ('ILLEGAL: NON-COMPARABLE POINTS').
+    """
     # Forbid measurements between Points in different spaces
     if a.n != b.n: raise Exception("ILLEGAL: NON-COMPARABLE POINTS")
     # Euclidean distance between a and b is sqrt(sum((a[i]-b[i])^2) for all i)
@@ -117,6 +221,17 @@ def getDistance(a, b):
 ###########
 # -- Create a random Point in n-dimensional space
 def makeRandomPoint(n, lower, upper):
+    """Create a Point with n random coordinates drawn uniformly from [lower, upper].
+
+    Args:
+        n: The number of dimensions for the new point.
+        lower: The lower bound of the uniform distribution.
+        upper: The upper bound of the uniform distribution.
+
+    Returns:
+        A Point object with n coordinates each sampled from
+        random.uniform(lower, upper).
+    """
     coords = []
     for i in range(n): coords.append(random.uniform(lower, upper))
     return Point(coords)
@@ -125,6 +240,15 @@ def makeRandomPoint(n, lower, upper):
 #Main
 ##############
 def main(args):
+    """Run a demo K-means clustering on randomly generated 2-D points.
+
+    Creates 10 random points in 2-D space within [-200, 200] and clusters
+    them into 3 groups with a convergence cutoff of 0.5, then prints the
+    points and resulting clusters to stdout.
+
+    Args:
+        args: Command-line argument list (not currently used).
+    """
     num_points, n, k, cutoff, lower, upper = 10, 2, 3, 0.5, -200, 200
     # Create num_points random Points in n-dimensional space
     points = []
diff --git a/src/seqlib/continuousData.py b/src/seqlib/continuousData.py
index 7895d34..dd64923 100644
--- a/src/seqlib/continuousData.py
+++ b/src/seqlib/continuousData.py
@@ -1,8 +1,18 @@
-'''
-Created on Jun 30, 2009
-First attempt at a data structure for high-resolution genome-wide data
-@author: lgoff
-'''
+"""High-resolution genome-wide continuous data storage structures.
+
+Provides ContinuousData for per-nucleotide or binned coverage arrays on a
+single chromosome, and SimpleChIPData for loading, normalising, and scanning
+multi-sample NimbleGen ChIP data.
+
+Note: SimpleChIPData depends on rpy2 and tables (PyTables) as well as the
+Chip module from this package.
+
+First attempt at a data structure for high-resolution genome-wide data.
+
+Originally created on Jun 30, 2009.
+
+Author: lgoff
+"""
 import gzip
 import sys
 
@@ -14,14 +24,32 @@
 
 
 class ContinuousData(object):
-    '''
-    Data storage object that is specific to a single chromosome
-    '''
+    """Per-chromosome continuous (coverage) data storage backed by numpy arrays.
+
+    Stores strand-separated floating-point data at a configurable bin
+    resolution. Supports interval-based data accumulation, range extraction,
+    and gzipped binary serialisation.
+
+    Attributes:
+        name: Sample name string.
+        chr: Chromosome name (must be in genomelib.chr_lengths).
+        binSize: Resolution in base pairs per bin (default 1).
+        fname: Default filename for binary output.
+        data: Dict with "+" and "-" keys mapping to numpy float64 arrays.
+    """
 
     def __init__(self,name,chr,binSize = 1,data = {}):
-        '''
-        Constructor: Creates instance specifically tailored to a given chromosome
-        '''
+        """Construct a ContinuousData object for a single chromosome.
+
+        If data is non-empty, it is used directly. Otherwise, two zero-filled
+        numpy arrays of length chr_length // binSize are created.
+
+        Args:
+            name: Sample name string.
+            chr: Chromosome name string (must be in genomelib.chr_lengths).
+            binSize: Bin size in base pairs (default 1).
+            data: Optional pre-existing dict with "+" and "-" numpy arrays.
+        """
         self.name = name
         self.chr = chr
         self.binSize = int(binSize)
@@ -35,37 +63,99 @@ def __init__(self,name,chr,binSize = 1,data = {}):
                          }
 
     def __len__(self):
-        """Equivalent to length of the genome"""
+        """Return the number of bins, equivalent to the chromosome length in bins."""
         return np.alen(self.data['+'])
 
     def __repr__(self):
+        """Return the sample name string."""
         return self.name
 
     def __str__(self):
+        """Return the sample name string."""
         return self.name
 
     def getMin(self,strand):
+        """Return the minimum value in the data array for the given strand.
+
+        Args:
+            strand: "+" or "-".
+
+        Returns:
+            Minimum float value in self.data[strand].
+        """
         return np.amin(self.data[strand])
 
     def getMax(self,strand):
+        """Return the maximum value in the data array for the given strand.
+
+        Args:
+            strand: "+" or "-".
+
+        Returns:
+            Maximum float value in self.data[strand].
+        """
         return np.amax(self.data[strand])
 
     def whichMax(self,strand):
+        """Return the bin index of the maximum value for the given strand.
+
+        Args:
+            strand: "+" or "-".
+
+        Returns:
+            Integer index of the maximum element in self.data[strand].
+        """
         return np.argmax(self.data[strand])
 
     def whichMin(self,strand):
+        """Return the bin index of the minimum value for the given strand.
+
+        Args:
+            strand: "+" or "-".
+
+        Returns:
+            Integer index of the minimum element in self.data[strand].
+        """
         return np.argmin(self.data[strand])
 
     def getDataRange(self,strand,start,end):
+        """Return the data array slice corresponding to a genomic coordinate range.
+
+        Args:
+            strand: "+" or "-".
+            start: Genomic start coordinate.
+            end: Genomic end coordinate.
+
+        Returns:
+            Numpy array slice of self.data[strand] for the given range.
+        """
         return self.data[strand][(start//self.binSize)-1:(end//self.binSize)-1]
 
     def addInterval(self,interval):
+        """Accumulate an interval's count into the data arrays.
+
+        Adds interval.count to each bin covered by the interval on its strand.
+        Does nothing if the interval's chromosome does not match self.chr.
+
+        Args:
+            interval: An object with chr, strand, start, end, and count
+                attributes.
+
+        Returns:
+            The string "Wrong data file" if interval.chr != self.chr,
+            otherwise None.
+        """
         if self.chr != interval.chr:
             return "Wrong data file"
         else:
             self.data[interval.strand][(interval.start//self.binSize)-1:(interval.end//self.binSize)-1]=self.data[interval.strand][(interval.start//self.binSize)-1:(interval.end//self.binSize)-1]+interval.count
 
     def write(self,fname=None):
+        """Write data arrays to a gzipped binary file.
+
+        Args:
+            fname: Output file path. Defaults to self.fname if not provided.
+        """
         if fname == None:
             fname = self.fname
         fd = gzip.open(fname,'wb')
@@ -74,19 +164,65 @@ def write(self,fname=None):
         fd.close()
 
     def read(self,fname):
+        """Read data from a file (not yet implemented).
+
+        Args:
+            fname: Path to the file to read from.
+        """
         pass
 
     def innerHeight(self,strand,start,end):
+        """Return the maximum value (peak height) within a genomic range.
+
+        Args:
+            strand: "+" or "-".
+            start: Genomic start coordinate.
+            end: Genomic end coordinate.
+
+        Returns:
+            Maximum float value in the data range.
+        """
         region = self.getDataRange(strand,start,end)
         return np.amax(region)
 
     def outerHeight(self,strand,start,end):
+        """Return the total signal (sum) within a genomic range.
+
+        Args:
+            strand: "+" or "-".
+            start: Genomic start coordinate.
+            end: Genomic end coordinate.
+
+        Returns:
+            Sum of all values in the data range.
+        """
         region = self.getDataRange(strand,start,end)
         return sum(region)
 
 class SimpleChIPData(object):
+    """Multi-sample NimbleGen ChIP-chip data container with normalisation and scanning.
+
+    Loads NimbleGen GFF probe files, applies quantile normalisation via
+    limma, joins probes into intervals, and scans intervals with a sliding
+    window test.
+
+    Attributes:
+        data: Dict mapping sample name to list of probe Intervals.
+        samples: List of sample name strings in load order.
+        dataMatrix: 2D numpy float array of probe scores (set by makeMatrix).
+        normMatrix: 2D numpy array of quantile-normalised scores (set by
+            quantileNormalize).
+        intervals: Dict mapping sample name to list of joined Intervals (set
+            by joinProbes).
+    """
 
     def __init__(self,files):
+        """Load NimbleGen GFF files and initialise the data store.
+
+        Args:
+            files: List of GFF file paths to load. Each file's sample name is
+                derived by stripping the ".gff" extension.
+        """
         self.data = {}
         self.samples = []
         for fname in files:
@@ -96,12 +232,28 @@ def __init__(self,files):
             self.data[sampleName] = Chip.parseNimblegen(fname)
 
     def doIt(self,permuted,windows=[5,6,7,8,9,10,11,12],threshold=0.05):
+        """Run the full normalise-join-scan pipeline.
+
+        Calls normalize(), joinProbes(), and then scan() for each window size.
+
+        Args:
+            permuted: Permuted score data passed to scan() for significance
+                testing.
+            windows: List of window sizes to scan (default [5..12]).
+            threshold: Significance threshold for scanning (default 0.05).
+        """
         self.normalize()
         self.joinProbes()
         for winSize in windows:
             self.scan(permuted,winSize,threshold)
 
     def makeMatrix(self):
+        """Build self.dataMatrix from probe scores across all samples.
+
+        Creates a 2D numpy float array of shape (n_probes, n_samples) where
+        each column contains the scores for one sample in probe order.
+        Writes a progress message to stderr on completion.
+        """
         data_keys = list(self.data.keys())
         self.dataMatrix = np.empty((len(self.data[data_keys[0]]),len(self.samples)),'f')
         for i in range(0,len(data_keys)):
@@ -109,12 +261,22 @@ def makeMatrix(self):
         sys.stderr.write("Created dataMatrix!\n")
 
     def quantileNormalize(self):
+        """Apply quantile normalisation to self.dataMatrix using limma.
+
+        Calls makeMatrix() first if dataMatrix is not yet set. Requires the
+        R limma package. Stores the result in self.normMatrix.
+        """
         if 'dataMatrix' not in self.__dict__: self.makeMatrix()
         rpy.r.library("limma")
         sys.stderr.write("Performing Quantile Normalization...\n")
         self.normMatrix = rpy.r.normalizeQuantiles(self.dataMatrix)
 
     def normalize(self):
+        """Replace probe scores with quantile-normalised values.
+
+        Calls quantileNormalize() first if normMatrix is not yet set. Updates
+        the score attribute of every probe object in self.data in-place.
+        """
         if 'normMatrix' not in self.__dict__: self.quantileNormalize()
         sys.stderr.write("Replacing values in data with normalized values...\n")
         data_keys = list(self.data.keys())
@@ -123,6 +285,11 @@ def normalize(self):
                 self.data[data_keys[i]][j].score = self.normMatrix[j,i]
 
     def joinProbes(self):
+        """Join adjacent probes into contiguous intervals for each sample.
+
+        Populates self.intervals dict via Chip.joinNimblegenIntervals().
+        Writes per-sample progress messages to stderr.
+        """
         sys.stderr.write("Joining Probes into intervals...\n")
         self.intervals = {}
         for sample in self.samples:
@@ -130,6 +297,17 @@ def joinProbes(self):
             self.intervals[sample] = Chip.joinNimblegenIntervals(self.data[sample])
 
     def scan(self,permuted,windowSize,threshold=0.05):
+        """Scan all intervals with a sliding window test of the given size.
+
+        Calls i.scan(permuted, windowSize, threshold) on every interval in
+        every sample. Writes progress messages to stderr.
+
+        Args:
+            permuted: Permuted score data used by the interval scan method for
+                significance testing.
+            windowSize: Integer number of probes per sliding window.
+            threshold: Significance threshold (default 0.05).
+        """
         sys.stderr.write("Scanning with window of size %d..\n" % windowSize)
         for sample in self.samples:
             sys.stderr.write("\t%s\n" % sample)
diff --git a/src/seqlib/converters.py b/src/seqlib/converters.py
index d9009a4..3a0266c 100644
--- a/src/seqlib/converters.py
+++ b/src/seqlib/converters.py
@@ -1,12 +1,30 @@
 '''
+File format conversion utilities for genomic annotation files.
+
+Contains functions for converting between common bioinformatics file formats
+such as BED and GTF.
+
 Created on Mar 17, 2011
 
 @author: lgoff
 '''
 # from misc import rstrips  # rasmus library removed - not Python 3.12 compatible
 
-def bed2GTF(fname,outfile=None):
-    """This does not work yet"""
+def bed2GTF(fname, outfile=None):
+    """Convert a BED file to GTF format (not yet fully implemented).
+
+    Opens the input BED file, writes comment lines and track/browser header
+    lines through unchanged, and parses remaining tab-delimited lines. The
+    actual record conversion logic is not yet implemented.
+
+    Note: This function is incomplete and does not currently produce GTF
+    output records.
+
+    Args:
+        fname: Path to the input BED file.
+        outfile: Path for the output GTF file. Defaults to fname with the
+            trailing '.bed' stripped and '.gtf' appended.
+    """
     handle = open(fname,'r')
     if outfile == None:
         outfile = fname.rstrip('.bed')+'.gtf'
diff --git a/src/seqlib/dbConn.py b/src/seqlib/dbConn.py
index a084380..50c28d1 100644
--- a/src/seqlib/dbConn.py
+++ b/src/seqlib/dbConn.py
@@ -1,4 +1,15 @@
 #!/usr/bin/env python
+"""Database connection helpers and genomic data retrieval utilities.
+
+Provides connection factories for several MySQL databases (Broad Institute
+internal, UCSC Genome Browser public mirror, local UCSC mirror on 'valor', and
+Ensembl) and a collection of query functions for fetching RefSeq transcripts,
+wgRNA annotations, CpG islands, repeat overlaps, lincRNA records, and miRNA
+seed sequences.
+
+Most connection functions require network access to specific internal or public
+servers and appropriate credentials.
+"""
 import sys
 import time
 
@@ -14,6 +25,13 @@
 #
 ###################
 def broadConnect():
+    """Opens a DictCursor connection to the Broad Institute MySQL database.
+
+    Connects to the lgoff_nextgen schema on mysql.broadinstitute.org.
+
+    Returns:
+        A MySQLdb DictCursor for the lgoff_nextgen database.
+    """
     host="mysql.broadinstitute.org"
     user="lgoff"
     password=""
@@ -27,6 +45,14 @@ def broadConnect():
 #
 ###################
 def gbdbConnect(gbdbname = "hg18"):
+    """Opens a DictCursor connection to the UCSC Genome Browser public MySQL mirror.
+
+    Args:
+        gbdbname: UCSC genome database name (default: 'hg18').
+
+    Returns:
+        A MySQLdb DictCursor for the specified UCSC genome database.
+    """
     gbHost = "genome-mysql.cse.ucsc.edu"
     gbUser = "genome"
     gbdb = MySQLdb.connect(host=gbHost,user=gbUser,db=gbdbname)
@@ -38,6 +64,17 @@ def gbdbConnect(gbdbname = "hg18"):
 #
 ###################
 def valorGbdbConnect(gbdbname='hg19'):
+    """Opens a DictCursor connection to the local UCSC Genome Browser mirror on 'valor'.
+
+    Connects to a locally hosted UCSC mirror database using the root account
+    without a password.
+
+    Args:
+        gbdbname: Local UCSC genome database name (default: 'hg19').
+
+    Returns:
+        A MySQLdb DictCursor for the specified local genome database.
+    """
     gbHost = 'localhost'
     gbUser = 'root'
     gbPass = ''
@@ -50,6 +87,14 @@ def valorGbdbConnect(gbdbname='hg19'):
 #
 ####################
 def ensemblConnect():
+    """Opens a DictCursor connection to the public Ensembl MySQL server.
+
+    Connects to the homo_sapiens_core_47_36i schema on ensembldb.ensembl.org
+    using the anonymous account.
+
+    Returns:
+        A MySQLdb DictCursor for the Ensembl homo_sapiens_core_47_36i database.
+    """
     ensemblHost = "ensembldb.ensembl.org"
     ensemblUser = "anonymous"
     ensembldbname = "homo_sapiens_core_47_36i"
@@ -78,6 +123,18 @@ def fetchRefSeq(genome = 'hg18',lookupval = 'name'):
     return output 
 
 def fetchRefSeqIntervals(genome = 'hg18'):
+    """Returns a dictionary of RefSeq SplicedInterval objects keyed by transcript name.
+
+    Queries the refGene table of the UCSC Genome Browser database and
+    constructs an intervallib.SplicedInterval for each transcript.
+
+    Args:
+        genome: UCSC genome database name (default: 'hg18').
+
+    Returns:
+        A dictionary mapping RefSeq transcript names to SplicedInterval
+        objects.
+    """
     cursor = gbdbConnect(gbdbname=genome)
     select = "SELECT * from refGene"
     cursor.execute(select)
@@ -146,6 +203,23 @@ def fetchRefSeqIntervalsIndexed(genome='hg18',proteinCodingOnly=False,verbose=Fa
     return output
 
 def getIntervalFromRefSeq(lookupval,genome='hg18',lookupkey= 'name2',verbose=False):
+    """Returns SplicedInterval objects for RefSeq transcripts matching a lookup value.
+
+    Queries the UCSC refGene table for rows where lookupkey equals lookupval
+    and constructs an intervallib.SplicedInterval for each matching transcript.
+
+    Args:
+        lookupval: The value to search for (e.g. a gene symbol or transcript
+            ID).
+        genome: UCSC genome database name (default: 'hg18').
+        lookupkey: refGene column to search against (default: 'name2', which
+            corresponds to the gene symbol).
+        verbose: If True, print the SQL query and row count to stderr
+            (default: False).
+
+    Returns:
+        A list of SplicedInterval objects for the matching transcripts.
+    """
     cursor = gbdbConnect(gbdbname=genome)
     select = """SELECT * FROM refGene WHERE %s = '%s'""" % (lookupkey,lookupval)
     if verbose:
@@ -170,6 +244,22 @@ def getIntervalFromRefSeq(lookupval,genome='hg18',lookupkey= 'name2',verbose=Fal
     return output
 
 def getIntervalFromAll_mRNA(lookupval,genome='hg18',lookupkey='qName',verbose=False):
+    """Returns SplicedInterval objects from the UCSC all_mrna alignment table.
+
+    Queries the all_mrna table for mRNA BLAT alignments matching lookupval
+    in the specified column, and constructs a SplicedInterval for each row.
+
+    Args:
+        lookupval: The value to search for (e.g. a GenBank accession).
+        genome: UCSC genome database name (default: 'hg18').
+        lookupkey: all_mrna column to search (default: 'qName', the query
+            sequence name).
+        verbose: If True, print the SQL query and row count to stderr
+            (default: False).
+
+    Returns:
+        A list of SplicedInterval objects for the matching alignments.
+    """
     cursor = gbdbConnect(gbdbname=genome)
     select = """SELECT * FROM all_mrna WHERE %s = '%s'""" % (lookupkey,lookupval)
     if verbose:
@@ -211,6 +301,15 @@ def refseqTSS():
     return output
 
 def fetchwgRNA():
+    """Returns all wgRNA entries from the UCSC Genome Browser indexed by chromosome, strand, and name.
+
+    Queries the wgRna table of the default genome (hg18) and organises
+    results into a nested dictionary structure.
+
+    Returns:
+        A dictionary of the form output[chr][strand][name] = row_dict for
+        each wgRNA entry on a standard chromosome.
+    """
     cursor=gbdbConnect()
     select="SELECT * FROM wgRna"
     cursor.execute(select)
@@ -246,6 +345,20 @@ def hostRefSeq(chr,start,end,strand):
         return results
 
 def testCpG(chr,start,end):
+    """Tests whether a genomic interval overlaps a CpG island in the UCSC database.
+
+    Queries the cpgIslandExt table for CpG islands that overlap the given
+    coordinates.
+
+    Args:
+        chr: Chromosome name (e.g. 'chr1').
+        start: Start coordinate (0-based).
+        end: End coordinate.
+
+    Returns:
+        The first matching row as a dictionary, or False if no CpG island
+        overlaps the interval.
+    """
     cursor=gbdbConnect()
     selSQL="SELECT * from cpgIslandExt WHERE chrom='%s' AND chromStart<='%d' AND chromEnd>='%d'" % (chr,int(start),int(end))
     cursor.execute(selSQL)
@@ -273,6 +386,21 @@ def testwgRNA(chr,start,end,strand):
         return results
 
 def hostmRNA(chr,start,end,strand):
+    """Returns mRNA alignments that span a given genomic interval from the UCSC database.
+
+    Queries a chromosome-specific mRNA table (named <chr>_mrna) for
+    alignments that contain the interval [start, end].
+
+    Args:
+        chr: Chromosome name (e.g. 'chr1').
+        start: Start coordinate of the query interval.
+        end: End coordinate of the query interval.
+        strand: Strand orientation (not currently used in the SQL query).
+
+    Returns:
+        A list of row dictionaries for overlapping mRNA alignments, or False
+        if none are found.
+    """
     cursor=gbdbConnect()
     selSQL="SELECT * from %s_mrna WHERE tName='%s' AND tStart<='%d' AND tEnd>='%d'" % (chr,chr,int(start),int(end))
     cursor.execute(selSQL)
@@ -286,6 +414,19 @@ def hostmRNA(chr,start,end,strand):
         return results
 
 def fetchLincRNA(fname="/seq/compbio/lgoff/lincRNAs/hg18_lincRNA_Guttman.bed"):
+    """Reads a lincRNA BED file and returns intervals indexed by chromosome.
+
+    Parses a three-column BED file (chr, start, end) and organises the
+    resulting intervals into a dictionary keyed by chromosome name.
+
+    Args:
+        fname: Path to a BED file of lincRNA intervals (default: hg18
+            Guttman et al. lincRNA catalogue).
+
+    Returns:
+        A dictionary mapping chromosome names to lists of interval
+        dictionaries, each with keys 'chr', 'start' (int), and 'end' (int).
+    """
     handle=open(fname,'r')
     lincs={}
     for chr in genomelib.chr_names:
@@ -300,6 +441,21 @@ def fetchLincRNA(fname="/seq/compbio/lgoff/lincRNAs/hg18_lincRNA_Guttman.bed"):
     return lincs
 
 def fetchmiRNASeeds(fname="/seq/compbio/lgoff/smallRNAs/genomes/human/microRNA/mature.fa",species = 'hsa'):
+    """Reads a miRBase FASTA file and returns a dictionary mapping seed sequences to miRNA names.
+
+    Extracts the 7-nt seed sequence (positions 2-8 of the mature miRNA) for
+    each entry matching the given species prefix.
+
+    Args:
+        fname: Path to a miRBase mature miRNA FASTA file (default: internal
+            Broad Institute path).
+        species: Two- or three-letter miRBase species prefix to filter by
+            (default: 'hsa' for Homo sapiens).
+
+    Returns:
+        A dictionary mapping 7-nt seed sequences (str) to the first token
+        of the miRNA name (str).
+    """
     handle = open(fname,'r')
     seeds = {}
     iter = sequencelib.FastaIterator(handle)
@@ -313,6 +469,21 @@ def fetchmiRNASeeds(fname="/seq/compbio/lgoff/smallRNAs/genomes/human/microRNA/m
 ############
 
 def findRepeatOverlap(interval,cursor=None):
+    """Returns RepeatMasker annotations that overlap a given genomic interval.
+
+    Queries the rmsk table of the local UCSC mirror for repeat elements that
+    partially or fully overlap the interval.
+
+    Args:
+        interval: An intervallib interval object with chr, start, end, and
+            genome attributes.
+        cursor: An optional pre-existing MySQLdb DictCursor.  If None, a new
+            connection to the local valor UCSC mirror is opened.
+
+    Returns:
+        A list of row dictionaries for overlapping repeats, or False if none
+        are found.
+    """
     if cursor == None:
         cursor = valorGbdbConnect(interval.genome)
     selSQL = "SELECT * from rmsk WHERE genoName = '%s' AND (genoStart >= '%d' OR genoEnd >= '%d') AND (genoStart <= '%d' OR genoEnd <= '%d')" % (interval.chr,interval.start,interval.start,interval.end,interval.end)
@@ -327,6 +498,21 @@ def findRepeatOverlap(interval,cursor=None):
         return results
     
 def findUCSCOverlap(interval,cursor=None):
+    """Returns UCSC knownGene entries (with RefSeq mapping) that overlap a given interval.
+
+    Queries the knownGene table joined to knownToRefSeq on the local UCSC
+    mirror for known genes that partially or fully overlap the interval.
+
+    Args:
+        interval: An intervallib interval object with chr, start, end, and
+            genome attributes.
+        cursor: An optional pre-existing MySQLdb DictCursor.  If None, a new
+            connection to the local valor UCSC mirror is opened.
+
+    Returns:
+        A list of row dictionaries for overlapping known genes, or False if
+        none are found.
+    """
     if cursor == None:
         cursor = valorGbdbConnect(interval.genome)
     selSQL = "SELECT * from knownGene kg LEFT JOIN knownToRefSeq krs ON kg.name = krs.name WHERE kg.chrom = '%s' AND (kg.txStart >= '%d' OR kg.txEnd >= '%d') AND (kg.txStart <= '%d' OR kg.txEnd <= '%d')" % (interval.chr,interval.start,interval.start,interval.end,interval.end)
diff --git a/src/seqlib/genomelib.py b/src/seqlib/genomelib.py
index 1cf0d84..d0b712b 100644
--- a/src/seqlib/genomelib.py
+++ b/src/seqlib/genomelib.py
@@ -1,10 +1,20 @@
-'''
-Created on Aug 28, 2010
+"""Genome-level utilities and constants for human genome builds.
 
-This is a port of the genome.py module from seqtools (it is a work in progress)
+Contains chromosome names, lengths, and base frequencies for hg18, along with
+helper functions for fetching genome sequences (via pygr), generating random
+genomic regions, building repeat-masker and refGene NLMSA indices, and
+checking whether a sequence is soft-masked.
 
-@author: lgoff
-'''
+Note: Functions that depend on pygr (pygrConnect, build_rmsk_nlmsa,
+refGene_nlmsa, fetchSequence) are non-functional in Python 3 because pygr
+is a Python 2-only library.
+
+This is a port of the genome.py module from seqtools (work in progress).
+
+Originally created on Aug 28, 2010.
+
+Author: lgoff
+"""
 ############
 #Imports
 ############
@@ -94,6 +104,19 @@
 #Functions
 #######
 def fetch_genbases(genhandle,genbases={}):
+    """Count occurrences of each nucleotide across an entire genome FASTA file.
+
+    Iterates over all sequences in the FASTA file and tallies A, T, G, C, and N
+    counts. Results are accumulated into the genbases dict.
+
+    Args:
+        genhandle: An open file handle to a genome FASTA file.
+        genbases: Optional dict to accumulate counts into (default new dict).
+            Mutated in-place and also returned.
+
+    Returns:
+        Dict mapping each base character to its total integer count.
+    """
     bases = ['A','T','G','C','N']
     geniter = sequencelib.FastaIterator(genhandle)
     for genseq in geniter:
@@ -123,6 +146,17 @@ def random_region(n,m=1):
     return c, start, end, strand
 
 def isMasked(s):
+    """Return True if the sequence contains any soft-masked or N characters.
+
+    Soft-masked characters are lowercase a, c, t, g, and n, plus uppercase N.
+
+    Args:
+        s: DNA sequence string.
+
+    Returns:
+        True if any character in s is in the set {a, c, t, g, n, N},
+        False otherwise.
+    """
     maskedChars='actgnN'
     for c in s:
         if c in maskedChars:
@@ -136,6 +170,25 @@ def isMasked(s):
 #SeqPath = pygr.Data.Bio.Seq.Genome.HUMAN.hg18
 
 def pygrConnect(genome="hg18",useWorldbase = False):
+    """Return a pygr genome sequence database handle for the given build.
+
+    Note: pygr is a Python 2-only library and is not available in Python 3.
+    This function will raise an ImportError or NameError at call time in
+    Python 3 environments.
+
+    Args:
+        genome: Genome build identifier string. Supported values: "hg18",
+            "hg19", "mm9", "mm8" (worldbase only for mm8).
+        useWorldbase: If True, connect via pygr's worldbase service. If
+            False (default), open the local FASTA file via SequenceFileDB.
+
+    Returns:
+        A pygr SequenceFileDB or worldbase genome object supporting
+        chromosome-level sequence access.
+
+    Raises:
+        AssertionError: If genome is not recognised.
+    """
     if useWorldbase:
         if genome == "hg18":
             res=worldbase.Bio.Seq.Genome.HUMAN.hg18()
@@ -161,20 +214,50 @@ def pygrConnect(genome="hg18",useWorldbase = False):
 #pygr annotation layers
 #This is very closely tied to valor
 class UCSCStrandDescr(object):
+    """A descriptor that converts UCSC strand strings to pygr orientation ints.
+
+    Returns 1 for "+" strand and -1 for all other strands. Intended to be
+    used as a class attribute on sqlgraph row classes.
+    """
     def __get__(self, obj, objtype):
+        """Return orientation integer for the row object's strand.
+
+        Args:
+            obj: The row instance whose strand attribute is read.
+            objtype: The owner class (unused).
+
+        Returns:
+            1 if obj.strand == "+", otherwise -1.
+        """
         if obj.strand == '+':
             return 1
         else:
             return -1
 
 class UCSCSeqIntervalRow(sqlgraph.TupleO):
+    """A sqlgraph TupleO row class for UCSC interval tables.
+
+    Adds an orientation attribute via UCSCStrandDescr, converting the
+    strand column to a pygr-compatible +1/-1 integer.
+    """
     orientation = UCSCStrandDescr()
 
 serverInfo = sqlgraph.DBServerInfo(host='localhost',user='root',passwd='')
 
 def build_rmsk_nlmsa(genome="hg19"):
+    """Build a pygr NLMSA index for the RepeatMasker annotation table.
+
+    Connects to the local UCSC MySQL server, creates an AnnotationDB over
+    the rmsk table, and writes the NLMSA index to disk for later use.
+
+    Note: Requires a running local MySQL server with the UCSC schema and
+    pygr installed (Python 2 only).
+
+    Args:
+        genome: Genome build string (default "hg19").
+    """
     #This is horse shit...
-    
+
     seqDB = pygrConnect(genome)
     rmsk = sqlgraph.SQLTable('hg19.rmsk',serverInfo=serverInfo,itemClass=UCSCSeqIntervalRow,primaryKey="lookupName")
     annodb = annotation.AnnotationDB(rmsk,
@@ -191,9 +274,23 @@ def build_rmsk_nlmsa(genome="hg19"):
     al.build()
 
 def refGene_nlmsa(genome="hg19"):
+    """Return a pygr NLMSA index for the refGene annotation table.
+
+    Attempts to load a pre-built NLMSA from disk. If not found, builds one
+    from the local UCSC MySQL refGene table and saves it to disk.
+
+    Note: Requires a running local MySQL server with a 'lookupName' primary
+    key added to the refGene table, and pygr installed (Python 2 only).
+
+    Args:
+        genome: Genome build string (default "hg19").
+
+    Returns:
+        A cnestedlist.NLMSA object opened in read mode.
+    """
     #Needed to add primary key 'lookupName' to hg19.refGene for this to work (pygr requires unique ids for an annotation)
     #This is really CRAP....I don't know how or why anyone will every be able to use this....
-    
+
     try:
         al = cnestedlist.NLMSA('/n/rinn_data1/indexes/human/'+genome+'/refGene/refGene_'+genome,'r')
     except:
@@ -223,6 +320,20 @@ def refGene_nlmsa(genome="hg19"):
 #MISC
 ################
 def fetchSequence(chrom,start,end,strand,genome="hg18"):
+    """Fetch a genomic sequence from the specified region using pygr.
+
+    Note: Requires pygr (Python 2 only).
+
+    Args:
+        chrom: Chromosome name string (e.g. "chr1").
+        start: Start coordinate (0-based, integer).
+        end: End coordinate (integer).
+        strand: Strand string; if "-" the reverse complement is returned.
+        genome: Genome build string (default "hg18").
+
+    Returns:
+        A pygr sequence object for the requested region.
+    """
     connection=pygrConnect(genome)
     start,end=int(start),int(end)
     seq=connection[chrom][start:end]
diff --git a/src/seqlib/gibson.py b/src/seqlib/gibson.py
index 4223ca3..87a3367 100644
--- a/src/seqlib/gibson.py
+++ b/src/seqlib/gibson.py
@@ -1,10 +1,14 @@
-'''
-Created on Sep 19, 2012
+"""Tools for designing Gibson Assembly fragments from FASTA sequences.
 
-Script to create gibson assembly fragments for ordering from a fasta file.
+Reads a FASTA file of sequences (e.g. cDNAs or genomic regions) and splits
+each into overlapping fragments suitable for Gibson Assembly cloning.
+Optionally prepends Gateway attB recombination sequences to the outermost
+primers.  Fragments are written in a tab-delimited or pretty-printed format.
 
-@author: lgoff
-'''
+Usage::
+
+    python gibson.py [options] <fastaFile.fa>
+"""
 #Imports
 import getopt
 import sys
@@ -31,10 +35,42 @@
 '''
 
 class Usage(Exception):
+    """Exception raised for command-line usage errors.
+
+    Attributes:
+        msg: Human-readable explanation of the error or the help message.
+    """
     def __init__(self, msg):
+        """Initialises a Usage exception with an error message.
+
+        Args:
+            msg: Human-readable error or help text.
+        """
         self.msg = msg
 
 def gibson(fname,gateway=True,fragSize=500,overhangSize=20):
+    """Splits FASTA sequences into overlapping Gibson Assembly fragments.
+
+    Reads each record from a FASTA file and divides its sequence into a series
+    of fragments of approximately fragSize bp, with consecutive fragments
+    overlapping by overhangSize bp.  When gateway is True, the Gateway attB
+    forward site (attF) is prepended to the sequence and the reverse
+    complement of the Gateway attB reverse site (attR) is appended before
+    fragmentation.
+
+    Args:
+        fname: Path to a FASTA-format input file.
+        gateway: If True, add Gateway attB recombination sequences flanking
+            the insert before fragmentation (default: True).
+        fragSize: Target size in base pairs for each Gibson fragment
+            (default: 500).
+        overhangSize: Length in base pairs of the overlap between adjacent
+            fragments (default: 20).
+
+    Returns:
+        A dictionary mapping each FASTA record name to a list of fragment
+        sequence strings in 5'-to-3' order.
+    """
     res = {}
 
     #Fasta file handle
@@ -63,6 +99,17 @@ def gibson(fname,gateway=True,fragSize=500,overhangSize=20):
     return res
 
 def printGibson(fragDict,outHandle):
+    """Writes Gibson Assembly fragments to a file handle in tab-delimited format.
+
+    For each sequence in fragDict, prints a header line with the sequence name
+    followed by one line per fragment in the format:
+        <name>_block<N>\\t<fragment_sequence>
+
+    Args:
+        fragDict: Dictionary mapping sequence names to lists of fragment
+            sequence strings, as returned by gibson().
+        outHandle: Writable file-like object to receive the output.
+    """
     for k in fragDict.keys():
         print("%s:" % k, file=outHandle)
         blockCount = 0
@@ -77,6 +124,18 @@ def printGibson(fragDict,outHandle):
 # Main
 ##############
 def main(argv=None):
+    """Command-line entry point for the Gibson Assembly fragment designer.
+
+    Parses command-line arguments, calls gibson() to generate fragments from
+    the provided FASTA file, and writes the results with printGibson().
+
+    Args:
+        argv: List of command-line argument strings.  Defaults to sys.argv
+            when None.
+
+    Raises:
+        SystemExit: On usage errors or when --help is requested.
+    """
     if argv is None:
         argv = sys.argv
     verbose = False
diff --git a/src/seqlib/go.py b/src/seqlib/go.py
index 0d3f1ba..a7855e2 100644
--- a/src/seqlib/go.py
+++ b/src/seqlib/go.py
@@ -1,10 +1,30 @@
+"""Gene Ontology (GO) database parsing and traversal utilities.
+
+Provides classes and functions for loading a Gene Ontology OBO-XML file,
+representing GO terms, and traversing the GO DAG to retrieve all ancestor
+terms for a given GO accession.  Includes deprecated tab-delimited annotation
+file readers.
+"""
 import xml.sax.handler
 from xml.sax import make_parser
 from xml.sax.handler import feature_namespaces
 
 
 def readGo(filename):
-    """DEPRECATED"""
+    """Reads a tab-delimited GO annotation file and returns a mapping of gene IDs to GO terms.
+
+    DEPRECATED: This function relies on the Python 2 built-in file() and the
+    non-standard Dict class.  It is retained for historical reference only.
+
+    Args:
+        filename: Path to a tab-delimited GO annotation file where column 0
+            contains the gene/feature identifier and column 4 contains the
+            GO term.  Lines containing 'GI:' are skipped.
+
+    Returns:
+        A Dict (default list) mapping gene identifiers to lists of GO term
+        strings.
+    """
     terms = Dict(default=[])
     
     for line in file(filename):
@@ -20,7 +40,19 @@ def readGo(filename):
 
 
 def readCommonNames(filename):
-    """DEPRECATED"""
+    """Reads a tab-delimited file mapping identifiers to common gene names.
+
+    DEPRECATED: Relies on the Python 2 built-in file().  Retained for
+    historical reference only.
+
+    Args:
+        filename: Path to a two-column tab-delimited file where column 0 is
+            the primary identifier and column 1 is the common name ('-'
+            entries are skipped).
+
+    Returns:
+        A dictionary mapping primary identifiers to common name strings.
+    """
     commonNames = {}
 
     for line in file(filename):
@@ -33,7 +65,18 @@ def readCommonNames(filename):
 
 
 class GoTerm:
+    """Represents a single Gene Ontology term.
+
+    Attributes:
+        accession: GO accession string (e.g. 'GO:0008150').
+        name: Human-readable term name (e.g. 'biological process').
+        definition: Textual definition of the term.
+        is_a: List of parent GO accession strings linked by 'is_a' relations.
+        part_of: List of parent GO accession strings linked by 'part_of'
+            relations.
+    """
     def __init__(self):
+        """Initialises a GoTerm with empty/default attribute values."""
         self.accession = ""
         self.name = ""
         self.definition = ""
@@ -42,21 +85,56 @@ def __init__(self):
 #        self.synonym = []
 
 class AllTerm(GoTerm):
+    """Synthetic top-level GO term used as the root of the GO hierarchy.
+
+    AllTerm has a fixed accession and name of 'all' and is added to the
+    GoDatabase after parsing to provide a single root node for traversal.
+    """
     def __init__(self):
+        """Initialises AllTerm with accession='all' and name='all'."""
         GoTerm.__init__(self)
-        
+
         self.accession = "all"
         self.name = "all"
-        self.defintion = "top-level term" 
+        self.defintion = "top-level term"
 
 class GoHandler(xml.sax.handler.ContentHandler):
+    """SAX content handler for parsing Gene Ontology OBO-XML files.
+
+    Builds a dictionary of GoTerm objects from a GO OBO-XML file as it is
+    streamed through a SAX parser.  Handles go:term, go:is_a, go:part_of,
+    go:accession, go:name, and go:definition elements.
+
+    Attributes:
+        terms: Dictionary mapping GO accession strings to GoTerm objects.
+        term: The GoTerm currently being parsed, or None between terms.
+        elm: Name of the XML element currently open, used to route character
+            data to the correct GoTerm attribute.
+        base: URL prefix for the GO namespace, used to strip absolute URIs
+            to relative accession strings in is_a and part_of relations.
+    """
     def __init__(self, base):
+        """Initialises the GoHandler with a namespace base URL.
+
+        Args:
+            base: URL prefix for the GO namespace
+                (e.g. 'http://www.geneontology.org/go#').
+        """
         self.terms = {}
         self.term = None
         self.elm = ""
         self.base = base
-    
+
     def startElement(self, name, attrs):
+        """Handles the opening of an XML element during SAX parsing.
+
+        Creates a new GoTerm when a go:term element opens, and appends
+        parent accessions when go:is_a or go:part_of elements are encountered.
+
+        Args:
+            name: Local name of the XML element.
+            attrs: AttributesImpl object providing element attributes.
+        """
         if name == "go:term":
             self.term = GoTerm()
         elif name == "go:is_a":
@@ -70,11 +148,24 @@ def startElement(self, name, attrs):
         self.elm = name
     
     def endElement(self, name):
+        """Handles the closing of an XML element during SAX parsing.
+
+        Stores the completed GoTerm in the terms dictionary when a go:term
+        element closes, and resets the current element tracker.
+
+        Args:
+            name: Local name of the closing XML element.
+        """
         if name == "go:term":
             self.terms[self.term.accession] = self.term
         self.elm = ""
     
     def characters(self, text):
+        """Routes character data to the appropriate attribute of the current GoTerm.
+
+        Args:
+            text: Character data string from the SAX parser.
+        """
         if self.elm == "go:accession":
             self.term.accession = text
         elif self.elm == "go:name":
@@ -84,7 +175,22 @@ def characters(self, text):
         
 
 class GoDatabase:
+    """In-memory representation of a Gene Ontology database loaded from OBO-XML.
+
+    Parses a GO OBO-XML file using SAX and stores all terms in a dictionary
+    indexed by GO accession.  Provides methods for traversing the GO DAG to
+    retrieve ancestor terms.
+
+    Attributes:
+        terms: Dictionary mapping GO accession strings to GoTerm objects.
+            Also includes an 'all' entry (AllTerm) as the synthetic root.
+    """
     def __init__(self, filename):
+        """Loads and parses a Gene Ontology OBO-XML file.
+
+        Args:
+            filename: Path to a GO OBO-XML file (e.g. gene_ontology.obo.xml).
+        """
         # Create a parser
         parser = make_parser()
 
@@ -107,6 +213,28 @@ def __init__(self, filename):
     
     
     def getAllParents(self, goid, touched=None, count=0, ret=True):
+        """Returns all ancestor GO terms of a given GO accession via BFS.
+
+        Recursively follows is_a and part_of relationships to collect all
+        ancestor GO accessions in breadth-first discovery order (excluding
+        the synthetic 'all' root).
+
+        Args:
+            goid: A GO accession string (e.g. 'GO:0008150') whose ancestors
+                should be retrieved.
+            touched: Dictionary used internally to track visited accessions
+                and their discovery order.  Should not be passed by callers.
+            count: Integer counter used internally during recursion.  Should
+                not be passed by callers.
+            ret: If True (default), return the sorted list of ancestor
+                accessions.  If False, only populate touched (used during
+                recursion).
+
+        Returns:
+            When ret is True, a list of GO accession strings for all ancestors
+            of goid, ordered by discovery sequence (breadth-first).  Returns
+            None when ret is False.
+        """
         if touched == None:
             touched = {}
         
diff --git a/src/seqlib/intervallib.py b/src/seqlib/intervallib.py
index 6a67827..0f05bc9 100644
--- a/src/seqlib/intervallib.py
+++ b/src/seqlib/intervallib.py
@@ -1,9 +1,15 @@
 #!/usr/bin/env python
-'''
-Created on Jun 25, 2009
+"""Genomic interval data structures and utilities.
 
-@author: lgoff
-'''
+Provides the Interval and SplicedInterval classes for representing genomic
+regions, along with a collection of functions for parsing BED/FASTA files,
+performing interval arithmetic (overlaps, distances, TSS maps), and converting
+intervals to various output formats.
+
+Originally created on Jun 25, 2009.
+
+Author: lgoff
+"""
 # import genomelib
 import copy
 import os
@@ -22,10 +28,46 @@
 #This is very human-specific at this point
 
 class Interval:
-    """Basic interval class, try to use ChipInterval or SeqInterval if possible...
-        At this point, the Interval class is rather human specific so avoid calls to self.fetchSequence() or self.getChrNum(), etc...
+    """Basic genomic interval class.
+
+    Represents a genomic region defined by chromosome, start, end, and strand.
+    Try to use ChipInterval or SeqInterval if possible. At this point, the
+    Interval class is rather human-specific, so avoid calls to
+    self.fetchSequence() or self.getChrNum() in non-human contexts.
+
+    Attributes:
+        chr: Chromosome name (e.g. "chr1").
+        start: 0-based start coordinate.
+        end: End coordinate (inclusive).
+        strand: Strand orientation ("+", "-", or "*").
+        score: Floating-point score; can proxy for read count.
+        readcount: Integer read count for the interval (-1 if unset).
+        name: Human-readable name for the interval.
+        sequence: DNA sequence string for the interval (empty if not fetched).
+        data: Dictionary of arbitrary key-value metadata.
+        genome: Genome build identifier (default "hg18").
+        TSS: Transcription start site coordinate based on strand.
     """
     def __init__(self, chr, start, end, strand="*", score=0.0, readcount = -1,name="",sequence = "",data={},genome="hg18"):
+        """Initialize an Interval.
+
+        If the first argument is an existing Interval instance, all attributes
+        are copied from it (copy constructor behaviour).
+
+        Args:
+            chr: Chromosome name string, or an existing Interval to copy.
+            start: 0-based start coordinate.
+            end: End coordinate (inclusive).
+            strand: Strand orientation: "+", "-", or "*".
+            score: Floating-point score (default 0.0).
+            readcount: Integer read count (default -1 meaning unset).
+            name: Name string. If empty, a "chr:start-end:strand" label is
+                auto-generated.
+            sequence: DNA sequence string (default empty string).
+            data: Dictionary of arbitrary metadata (default empty dict).
+            genome: Genome build string used for sequence fetching
+                (default "hg18").
+        """
 
         #Check if creating new instance from old instance as 1st arg
         if isinstance(chr,Interval):
@@ -67,6 +109,14 @@ def __init__(self, chr, start, end, strand="*", score=0.0, readcount = -1,name="
             self.endIndex = -1
 
     def getTSS(self):
+        """Return the transcription start site coordinate.
+
+        Sets and returns self.TSS based on strand: start for "+" strand,
+        end for "-" strand.
+
+        Returns:
+            Integer coordinate of the TSS.
+        """
         if self.strand == "+":
             self.TSS = self.start
         elif self.strand == "-":
@@ -90,7 +140,17 @@ def childScores(self):
         return [x.score for x in self.children]
 
     def makeValMap(self,value = 'readcount'):
-        """Check these two to see which one is right..."""
+        """Build a positional value map across the interval from child intervals.
+
+        Creates self.valMap, a numpy array of length len(self) where each
+        position holds the average of the specified attribute over all child
+        intervals that cover that position. Positions with no coverage are set
+        to -1.
+
+        Args:
+            value: Name of the Interval attribute to average at each position
+                (default "readcount").
+        """
         self.valMap = np.zeros(len(self))
         self.valMap = self.valMap-1
         myTmp = []
@@ -104,32 +164,48 @@ def makeValMap(self,value = 'readcount'):
                 self.valMap[nt]=sum(myTmp[nt])/len(myTmp[nt])
 
     def __iter__(self):
+        """Iterate over characters in self.sequence."""
         return iter(self.sequence)
 
     def __getitem__(self,key):
+        """Return character(s) at index/slice key from self.sequence."""
         return self.sequence[key]
 
     def __repr__(self):
+        """Return the interval name, or a chr:start-end:strand string if name is empty."""
         if self.name == "":
             return "%s:%d-%d:%s" % (self.chr,self.start,self.end,self.strand)
         else:
             return self.name
 
     def __neg__(self):
+        """Return a new Interval with the strand flipped."""
         strandLookup = {"+":"-","-":"+"}
         newStrand = strandLookup[self.strand]
         return Interval(self.chr,self.start,self.end,newStrand,self.score,self.readcount)
 
     def __len__(self):
+        """Return the length of the interval in bases (end - start + 1)."""
         return self.end-self.start+1
 
     def __str__(self):
+        """Return self.sequence if set, otherwise self.name."""
         if self.sequence != "":
             return self.sequence
         else:
             return self.name
 
     def __lt__(self, b):
+        """Compare intervals by chromosomal position.
+
+        Compares first by chromosome number, then by midpoint position.
+
+        Args:
+            b: Another Interval to compare against.
+
+        Returns:
+            True if self sorts before b.
+        """
         chr_test_a = self.getChrNum()
         chr_test_b = b.getChrNum()
         if chr_test_a != chr_test_b:
@@ -139,15 +215,19 @@ def __lt__(self, b):
         return mid1 < mid2
 
     def __eq__(self, b):
+        """Return True if self and b have the same chr, start, and end."""
         return self.equals(b)
 
     def __le__(self, b):
+        """Return True if self is less than or equal to b."""
         return self.__lt__(b) or self.__eq__(b)
 
     def __gt__(self, b):
+        """Return True if self is greater than b."""
         return not self.__le__(b)
 
     def __ge__(self, b):
+        """Return True if self is greater than or equal to b."""
         return not self.__lt__(b)
 
     def windows(self,windowSize):
@@ -160,21 +240,53 @@ def toBed(self,value = 'score'):
         return "%s\t%d\t%d\t%s\t%.2f\t%s" %(self.chr,self.start,self.end,self.name,self.__dict__[value],self.strand)
 
     def toUCSC(self):
+        """Return a UCSC browser region string (chr:start-end).
+
+        Returns:
+            String formatted as "chr:start-end".
+        """
         return "%s:%d-%d" % (self.chr,self.start,self.end)
 
     def toStringNumIGV(self):
+        """Return an IGV-compatible numeric chromosome and start string.
+
+        Strips the "chr" prefix from the chromosome name.
+
+        Returns:
+            Tab-delimited string of numeric chromosome and start position.
+        """
         return "%s\t%d" % (self.chr.replace("chr",""),self.start)
 
     def toFasta(self):
+        """Return the interval as a FASTA-formatted string.
+
+        Returns:
+            String with a FASTA header line followed by self.sequence.
+        """
         return ">%s\n%s" % (self.name,self.sequence)
 
     def getString(self):
+        """Return a chr:start-end:strand string representation.
+
+        Returns:
+            String formatted as "chr:start-end:strand".
+        """
         return "%s:%d-%d:%s" % (self.chr,self.start,self.end,self.strand)
 
     def getScore(self):
+        """Return self.score.
+
+        Returns:
+            The floating-point score of the interval.
+        """
         return self.score
 
     def getStrand(self):
+        """Return self.strand.
+
+        Returns:
+            The strand string ("+", "-", or "*").
+        """
         return self.strand
 
     def mature(self,start,end):
@@ -225,7 +337,19 @@ def distanceBetweenTSS(self,b):
             return False
 
     def findDist(self,b):
-        """
+        """Return the signed distance from self's TSS to b's relevant end.
+
+        The relevant end of b depends on each interval's strand:
+        - self "+" and b "+": b.start - self.TSS
+        - self "+" and b "-": b.end - self.TSS
+        - self "-" and b "+": self.TSS - b.start
+        - self "-" and b "-": self.TSS - b.end
+
+        Args:
+            b: Another Interval.
+
+        Returns:
+            Signed integer distance.
         """
         if self.strand == "+" and b.strand == "+":
             return b.start-self.TSS
@@ -261,6 +385,14 @@ def getChrNum(self):
         else: return self.chr
 
     def fetchSequence(self):
+        """Fetch and store the genomic sequence for this interval via pygr.
+
+        Uses self.genome to connect to the genome database. On "-" strand
+        the reverse complement is returned. Sets and returns self.sequence.
+
+        Returns:
+            The DNA sequence string for the interval.
+        """
         if self.genome != "":
             genome = genomelib.pygrConnect(self.genome)
             seq = genome[self.chr][self.start-1:self.end]
@@ -297,6 +429,20 @@ def getGC(self):
         return self.gc
 
     def getPromoter(self,promUp=2000,promDown=0):
+        """Return an Interval representing the promoter region of self.
+
+        For "+" strand, the promoter spans [start - promUp, start + promDown].
+        For "-" strand, the promoter spans [end - promDown, end + promUp].
+
+        Args:
+            promUp: Number of bases upstream of the TSS to include
+                (default 2000).
+            promDown: Number of bases downstream of the TSS to include
+                (default 0).
+
+        Returns:
+            A new Interval representing the promoter region.
+        """
         if self.strand == "+":
             align = Interval(self.chr,self.start-promUp,self.start+promDown,self.strand,score=self.score,name=self.name+"_promoter")
         elif self.strand == "-":
@@ -304,6 +450,12 @@ def getPromoter(self,promUp=2000,promDown=0):
         return align
 
     def fold(self):
+        """Predict RNA secondary structure of self.sequence using RNAfold.
+
+        Runs RNAfold via subprocess on self.sequence. Sets self.structure to
+        the dot-bracket notation and self.mfe to the minimum free energy
+        (as a float). If parsing fails, both are set to the string "nan".
+        """
         command = "echo '%s' | %s" % (self.sequence,RNAFOLD)
         output = subprocess.getoutput(command)
         if len(output.split())>2:
@@ -314,15 +466,31 @@ def fold(self):
         return
 
     def getStructureFasta(self):
+        """Return the predicted RNA structure as a FASTA-formatted string.
+
+        Returns:
+            String with a FASTA header followed by self.structure in
+            dot-bracket notation.
+        """
         return ">%s\n%s" % (self.name,self.structure)
 
     def isPlus(self):
+        """Return True if the interval is on the "+" strand.
+
+        Returns:
+            True if self.strand == "+", otherwise False.
+        """
         if self.strand=="+":
             return True
         else:
             return False
 
     def isMinus(self):
+        """Return True if the interval is on the "-" strand.
+
+        Returns:
+            True if self.strand == "-", otherwise False.
+        """
         if self.strand=="-":
             return True
         else:
@@ -342,26 +510,84 @@ def nmer_dictionary(self,n,dic={}):
         return dic
 
     def intersects(self,b,start='start',end='end',offset=0):
+        """Return True if self and b overlap on the same chromosome and strand.
+
+        Args:
+            b: Another Interval.
+            start: Unused parameter name placeholder (default "start").
+            end: Unused parameter name placeholder (default "end").
+            offset: Optional integer offset added to b.end for looser matching
+                (default 0).
+
+        Returns:
+            True if the intervals share chr and strand and their coordinates
+            overlap (optionally expanded by offset).
+        """
         if self.chr == b.chr and self.strand==b.strand:
             return not(self.start>b.end+offset or b.start>self.end+offset)
         else:
             return False
 
     def grow5_prime(self,length):
+        """Extend the interval by length bases in the 5-prime direction.
+
+        For "+" strand, decreases self.start by length.
+        For "-" strand, increases self.end by length.
+
+        Args:
+            length: Number of bases to extend.
+        """
         if self.strand == "+":
             self.start = self.start-length
         elif self.strand == "-":
             self.end = self.end+length
 
     def grow3_prime(self,length):
+        """Extend the interval by length bases in the 3-prime direction.
+
+        For "+" strand, increases self.end by length.
+        For "-" strand, decreases self.start by length.
+
+        Args:
+            length: Number of bases to extend.
+        """
         if self.strand == "+":
             self.end = self.end+length
         elif self.strand == "-":
             self.start = self.start-length
 
 class SplicedInterval(Interval):
-    """Extends Interval and Adds/overwrites methods to incorporate spliced elements"""
+    """Genomic interval with spliced (multi-exon) structure.
+
+    Extends Interval with exon coordinate information parsed from BED12-style
+    blockSizes and blockStarts fields. Overrides __len__ to return the spliced
+    (CDS) length rather than the genomic footprint length.
+
+    Attributes:
+        exonLengths: List of integer exon lengths.
+        exonOffsets: List of integer exon start offsets relative to self.start.
+        exonStarts: List of absolute genomic start coordinates for each exon.
+        exonEnds: List of absolute genomic end coordinates for each exon.
+        numExons: Number of exons.
+    """
     def __init__(self, chr, start, end, strand="*",exonLengths=[],exonOffsets=[],score=0.0, readcount = -1,name="",sequence = "",data={},genome="hg18"):
+        """Initialize a SplicedInterval.
+
+        Args:
+            chr: Chromosome name string.
+            start: Genomic start coordinate.
+            end: Genomic end coordinate.
+            strand: Strand orientation (default "*").
+            exonLengths: Comma-separated string of exon lengths (BED12 field).
+            exonOffsets: Comma-separated string of exon offsets from start
+                (BED12 field).
+            score: Floating-point score (default 0.0).
+            readcount: Integer read count (default -1).
+            name: Interval name string.
+            sequence: DNA sequence string.
+            data: Dictionary of arbitrary metadata.
+            genome: Genome build string (default "hg18").
+        """
         Interval.__init__(self,chr,start,end,strand,score=score, readcount = readcount,name=name,sequence = sequence,data=data,genome=genome)
         self.exonLengths = [int(x) for x in exonLengths.rstrip(",").split(",")]
         self.exonOffsets = [int(x) for x in exonOffsets.rstrip(",").split(",")]
@@ -370,10 +596,15 @@ def __init__(self, chr, start, end, strand="*",exonLengths=[],exonOffsets=[],sco
         self.numExons = len(self.exonStarts)
 
     def __len__(self):
+        """Return the total spliced (CDS) length of all exons."""
         return self.CDSlen()
 
     def intervalLen(self):
-        """Length of genomic footprint for self (ie. end-start+1)"""
+        """Length of genomic footprint for self (ie. end-start+1)
+
+        Returns:
+            Integer genomic span from start to end inclusive.
+        """
         return self.end-self.start+1
 
     def CDSlen(self):
@@ -722,6 +953,16 @@ def fetchRefSeqByChrom(RefSeqBed="/fg/compbio-t/lgoff/magda/references/human/tra
     return res
 
 def makeTSSBed(fname,outFname):
+    """Write a BED file of TSS positions derived from another BED file.
+
+    For each interval, the end coordinate is collapsed to the start ("+") or
+    the start is collapsed to the end ("-") to produce a single-base TSS
+    interval.
+
+    Args:
+        fname: Path to the input BED file.
+        outFname: Path to the output BED file to write.
+    """
     iter = parseBed(fname)
     outHandle = open(outFname,'w')
     for i in iter:
@@ -733,7 +974,17 @@ def makeTSSBed(fname,outFname):
         print(myInterval.toBed(), file=outHandle)
 
 def parseGalaxyCons(fname):
-    """Parses bed-like output of conservation fetch from Galaxy webserver"""
+    """Parse bed-like conservation output from the Galaxy webserver.
+
+    Reads a tab-delimited file where field 6 (index 6) contains the average
+    phastCons conservation score.
+
+    Args:
+        fname: Path to the Galaxy conservation BED-like file.
+
+    Yields:
+        Interval objects with score set to the phastCons value.
+    """
     handle=open(fname,'r')
     for line in handle:
         if line.startswith("#"):
@@ -754,7 +1005,19 @@ def parseGalaxyCons(fname):
         yield res
 
 def findNearest(myInterval,IntervalList):
-    """It would be nice to write some sort of binary search for Intervals"""
+    """Find the nearest interval to myInterval in IntervalList by start distance.
+
+    Performs a linear scan. Only considers intervals with a positive distance
+    (i.e., intervals that are downstream/to the right of myInterval).
+
+    Args:
+        myInterval: Reference Interval.
+        IntervalList: List of Interval objects to search.
+
+    Returns:
+        The Interval in IntervalList with the smallest positive distance to
+        myInterval, or 0 if no such interval exists.
+    """
 
     myDist = 9999999999999999999
     res = 0
diff --git a/src/seqlib/lincClonelib.py b/src/seqlib/lincClonelib.py
index ea26884..3da3b22 100644
--- a/src/seqlib/lincClonelib.py
+++ b/src/seqlib/lincClonelib.py
@@ -1,19 +1,19 @@
 #!/usr/bin/env python
-'''
-Created on Aug 19, 2010
+"""Primer design pipeline for lincRNA cloning, qPCR, and in situ hybridisation.
+
+Wraps the primer3_core command-line tool to design three classes of primers
+from FASTA sequences: cloning primers (with optional Gateway attB flanks),
+qPCR primers, and in situ hybridisation probe primers.  Output can be
+formatted as human-readable text or as tab-delimited tables for downstream
+processing.
 
 Requirements:
-    - primer3_core
+    - primer3_core executable on PATH
 
-@author: Loyal Goff
+Usage::
 
-TODO:
-- Add bed file output for primers as option
-- Integrate a few more primer3 options into commandline
-    * number of primers
-    * GC adjustment
-    * etc...
-'''
+    python lincClonelib.py [options] <fastaFile.fa>
+"""
 
 #from Bio.Emboss import Primer3
 import getopt
@@ -45,10 +45,41 @@
 
 
 class Usage(Exception):
+    """Exception raised for command-line usage errors in lincClonelib.
+
+    Attributes:
+        msg: Human-readable explanation of the error or the help message.
+    """
     def __init__(self, msg):
+        """Initialises a Usage exception with an error message.
+
+        Args:
+            msg: Human-readable error or help text.
+        """
         self.msg = msg
 
 def runPrimer3(fastaFile,p3CloneSetFile="/n/rinn_data1/users/lgoff/utils/primer_design/P3_cloning_primer_settings.p3",p3PCRSetFile="/n/rinn_data1/users/lgoff/utils/primer_design/P3_qPCR_primer_settings.p3",p3InsituSetFile="/n/rinn_data1/users/lgoff/utils/primer_design/P3_insitu_probe_settings.p3",verbose=False,keepTmp=False):
+    """Runs primer3_core to design qPCR, cloning, and in situ primers from a FASTA file.
+
+    Creates three Boulder-IO input files from the FASTA sequences and launches
+    three parallel primer3_core processes (one per primer type), each with its
+    own settings file.  Waits for all processes to complete before returning.
+
+    Args:
+        fastaFile: Path to a FASTA file of sequences to design primers for.
+            Sequences shorter than clonePrimerSteps[-1] + PRIMER_MAX_SIZE
+            bases are skipped for cloning design.
+        p3CloneSetFile: Path to a primer3 settings file for cloning primers.
+        p3PCRSetFile: Path to a primer3 settings file for qPCR primers.
+        p3InsituSetFile: Path to a primer3 settings file for in situ primers.
+        verbose: If True, write progress messages to stderr (default: False).
+        keepTmp: If True, retain the temporary Boulder-IO input files after
+            the run (default: False).
+
+    Returns:
+        A tuple of three strings: (qPCR_output_path, cloning_output_path,
+        insitu_output_path) giving the paths to the primer3 output files.
+    """
     baseName = fastaFile.rstrip(".fa")
     iter = sequencelib.FastaIterator(open(fastaFile,'r'))
     cloneTmpFname = baseName+"_clone.p3in"
@@ -97,17 +128,44 @@ def runPrimer3(fastaFile,p3CloneSetFile="/n/rinn_data1/users/lgoff/utils/primer_
     return (baseName+"_qPCR.p3out",baseName+"_cloning.p3out",baseName+"_insitu.p3out")
 
 def test():
+    """Smoke test for runPrimer3 using a hard-coded FASTA file.
+
+    Calls runPrimer3 on 'lincSFPQ.fa' and returns nothing.  Intended for
+    interactive testing only.
+    """
     fastaFile="lincSFPQ.fa"
     qPCR,cloning = runPrimer3(fastaFile)
     return
 
 def parsePrimer3(p3OutFile):
+    """Yields parsed primer3 Record objects from a primer3 output file.
+
+    Opens the specified output file and delegates parsing to primer3lib.parse,
+    yielding one Record object per sequence entry.
+
+    Args:
+        p3OutFile: Path to a primer3 output file (Boulder-IO format).
+
+    Yields:
+        primer3lib.Record objects, each containing the sequenceID, template
+        sequence, and a list of Primer objects.
+    """
     handle = open(p3OutFile,'r')
     iter = primer3lib.parse(handle)
     for record in iter:
         yield record
 
 def printqPCR(p3outFile,outHandle):
+    """Writes qPCR primer results in human-readable format.
+
+    Parses primer3 output and writes a formatted, multi-line report of qPCR
+    primer pairs grouped by sequence ID.  If no acceptable primers were found
+    for a sequence, a placeholder message is printed.
+
+    Args:
+        p3outFile: Path to a primer3 qPCR output file.
+        outHandle: Writable file-like object to receive the formatted output.
+    """
     recordIter = parsePrimer3(p3outFile)
     print("######################\n# qPCR Primers\n######################", file=outHandle)
     for record in recordIter:
@@ -129,6 +187,18 @@ def printqPCR(p3outFile,outHandle):
         print("--------------------------------", file=outHandle)
 
 def printqPCRTabDelim(p3outFile,outHandle):
+    """Writes qPCR primer results in tab-delimited format.
+
+    Parses primer3 output and writes one line per primer pair with columns:
+    sequenceID, primer type ('qPCR'), primer number, product size, forward
+    sequence, forward start, forward length, forward Tm, forward GC, reverse
+    sequence, reverse start, reverse length, reverse Tm, reverse GC.
+
+    Args:
+        p3outFile: Path to a primer3 qPCR output file.
+        outHandle: Writable file-like object to receive the tab-delimited
+            output.
+    """
     recordIter = parsePrimer3(p3outFile)
     #print >>outHandle, "######################\n# qPCR Primers\n######################"
     for record in recordIter:
@@ -145,6 +215,19 @@ def printqPCRTabDelim(p3outFile,outHandle):
 
 
 def printCloning(p3outFile,outHandle,gateway=False):
+    """Writes cloning primer results in human-readable format.
+
+    Parses primer3 output and writes a formatted, multi-line report of
+    cloning primer pairs grouped by sequence ID.  When gateway is True,
+    Gateway attB sequences are prepended to the forward and reverse primers
+    and 'Gateway' is noted in the output.
+
+    Args:
+        p3outFile: Path to a primer3 cloning output file.
+        outHandle: Writable file-like object to receive the formatted output.
+        gateway: If True, prepend attF to forward and attR to reverse primers
+            for Gateway cloning (default: False).
+    """
     recordIter = parsePrimer3(p3outFile)
     print("\n######################\n# Cloning Primers\n######################", file=outHandle)
     for record in recordIter:
@@ -170,6 +253,21 @@ def printCloning(p3outFile,outHandle,gateway=False):
         print("--------------------------------", file=outHandle)
 
 def printCloningTabDelim(p3outFile,outHandle,gateway=False):
+    """Writes cloning primer results in tab-delimited format.
+
+    Parses primer3 output and writes one line per primer pair with columns:
+    sequenceID, primer type ('Cloning'), primer number, product size, forward
+    sequence, forward start, forward length, forward Tm, forward GC, reverse
+    sequence, reverse start, reverse length, reverse Tm, reverse GC.  When
+    gateway is True, attB sequences are prepended to the primer sequences.
+
+    Args:
+        p3outFile: Path to a primer3 cloning output file.
+        outHandle: Writable file-like object to receive the tab-delimited
+            output.
+        gateway: If True, prepend attF to forward and attR to reverse primers
+            (default: False).
+    """
     recordIter = parsePrimer3(p3outFile)
     #print >>outHandle, "\n######################\n# Cloning Primers\n######################"
     for record in recordIter:
@@ -190,6 +288,15 @@ def printCloningTabDelim(p3outFile,outHandle,gateway=False):
                 print(outStr, file=outHandle)
 
 def printInsitu(p3outFile,outHandle):
+    """Writes in situ hybridisation primer results in human-readable format.
+
+    Parses primer3 output and writes a formatted, multi-line report of in situ
+    probe primer pairs grouped by sequence ID.
+
+    Args:
+        p3outFile: Path to a primer3 in situ output file.
+        outHandle: Writable file-like object to receive the formatted output.
+    """
     recordIter = parsePrimer3(p3outFile)
     print("######################\n# InSitu Primers\n######################", file=outHandle)
     for record in recordIter:
@@ -211,6 +318,18 @@ def printInsitu(p3outFile,outHandle):
         print("--------------------------------", file=outHandle)
 
 def printInsituTabDelim(p3outFile,outHandle):
+    """Writes in situ hybridisation primer results in tab-delimited format.
+
+    Parses primer3 output and writes one line per primer pair with columns:
+    sequenceID, primer type ('InSitu'), primer number, product size, forward
+    sequence, forward start, forward length, forward Tm, forward GC, reverse
+    sequence, reverse start, reverse length, reverse Tm, reverse GC.
+
+    Args:
+        p3outFile: Path to a primer3 in situ output file.
+        outHandle: Writable file-like object to receive the tab-delimited
+            output.
+    """
     recordIter = parsePrimer3(p3outFile)
     #print >>outHandle, "######################\n# qPCR Primers\n######################"
     for record in recordIter:
@@ -226,6 +345,17 @@ def printInsituTabDelim(p3outFile,outHandle):
                 print(outStr, file=outHandle)
 
 def printInsitu(p3outFile,outHandle):
+    """Writes in situ hybridisation primer results in human-readable format (second definition).
+
+    Duplicate of the earlier printInsitu definition; this version is the one
+    that Python will actually use at runtime.  Parses primer3 output and writes
+    a formatted, multi-line report of in situ probe primer pairs grouped by
+    sequence ID.
+
+    Args:
+        p3outFile: Path to a primer3 in situ output file.
+        outHandle: Writable file-like object to receive the formatted output.
+    """
     recordIter = parsePrimer3(p3outFile)
     print("######################\n# InSitu Primers\n######################", file=outHandle)
     for record in recordIter:
@@ -247,6 +377,19 @@ def printInsitu(p3outFile,outHandle):
         print("--------------------------------", file=outHandle)
 
 def printInsituTabDelim(p3outFile,outHandle):
+    """Writes ASO / in situ primer results in tab-delimited format (second definition).
+
+    Duplicate of the earlier printInsituTabDelim definition; this version
+    overrides the first at runtime.  Parses primer3 output for in situ /
+    ASO candidates and writes one tab-delimited line per primer pair with
+    an 'InSitu' type column.  When no candidates are found, writes an 'ASO'
+    type placeholder line.
+
+    Args:
+        p3outFile: Path to a primer3 output file.
+        outHandle: Writable file-like object to receive the tab-delimited
+            output.
+    """
     recordIter = parsePrimer3(p3outFile)
     #print >>outHandle, "######################\n# ASO Candidates\n######################"
     for record in recordIter:
@@ -262,6 +405,19 @@ def printInsituTabDelim(p3outFile,outHandle):
                 print(outStr, file=outHandle)
 
 def main(argv=None):
+    """Command-line entry point for the lincRNA primer design pipeline.
+
+    Parses command-line options, runs primer3 via runPrimer3, and writes
+    formatted primer output (human-readable or tab-delimited) to the output
+    file.  Cleans up temporary primer3 output files unless keepTmp is set.
+
+    Args:
+        argv: List of command-line argument strings.  Defaults to sys.argv
+            when None.
+
+    Raises:
+        SystemExit: On usage errors or when --help is requested.
+    """
     if argv is None:
         argv = sys.argv
     task = 'qpcr'
diff --git a/src/seqlib/lincName.py b/src/seqlib/lincName.py
index 8274798..c1fc87c 100644
--- a/src/seqlib/lincName.py
+++ b/src/seqlib/lincName.py
@@ -1,9 +1,22 @@
 #!/usr/bin/env python
-'''
-Created on Aug 27, 2010
+"""Assigns systematic names to lincRNA loci based on proximity to RefSeq genes.
 
-@author: lgoff
-'''
+Implements the naming scheme described in Guttman et al. for long intergenic
+non-coding RNA (lincRNA) loci:
+
+- If the 5' end of a lincRNA overlaps the 5' end of a protein-coding gene on
+  the opposite strand by less than the overlap threshold, the lincRNA is named
+  'linc-<GENE>-BP' (bidirectional promoter).
+- If a lincRNA overlaps any protein-coding gene on the opposite strand without
+  satisfying the bidirectional criterion, it is named 'linc-<GENE>-AS'
+  (antisense).
+- Otherwise, the lincRNA is named after the nearest downstream protein-coding
+  gene on the same strand: 'linc-<GENE>' (single lincRNA) or
+  'linc-<GENE>-<N>' (multiple lincRNAs near the same gene).
+
+Requires GTFlib, dbConn, and intervallib packages, and a connection to the
+UCSC genome browser MySQL server.
+"""
 
 ############
 #Imports
@@ -45,7 +58,17 @@
 #Classes
 ############
 class Usage(Exception):
+    """Exception raised for command-line usage errors in lincName.
+
+    Attributes:
+        msg: Human-readable explanation of the error or the help message.
+    """
     def __init__(self, msg):
+        """Initialises a Usage exception.
+
+        Args:
+            msg: Human-readable error or help text.
+        """
         self.msg = msg
 
 
@@ -54,7 +77,29 @@ def __init__(self, msg):
 ############
 
 def test5PrimeOverlap(lincInt,geneInt):
-    """May need to validate this.  I'm not sure this works when a lincRNA completely covers a PC gene on the opposite strand"""
+    """Determines whether the overlap between a lincRNA and a gene is at the lincRNA 5' end.
+
+    Tests whether a lincRNA interval overlaps a protein-coding gene such that
+    the overlap is at the 5' end of the lincRNA (and also involves the 5' end
+    of the gene on the opposite strand).  Used to identify bidirectional
+    promoter pairs.
+
+    Note: may not give correct results when a lincRNA completely spans a
+    protein-coding gene on the opposite strand.
+
+    Args:
+        lincInt: An interval object for the lincRNA with strand, start, and
+            end attributes.
+        geneInt: An interval object for the overlapping protein-coding gene
+            with strand, start, and end attributes.
+
+    Returns:
+        True if the overlap is at the 5' end of lincInt; False otherwise.
+
+    Raises:
+        AssertionError: If the two intervals do not overlap.
+        ValueError: If the strand of lincInt cannot be determined.
+    """
     assert lincInt.overlaps(geneInt)
     if lincInt.strand == "+":
         if lincInt.start <= geneInt.end and lincInt.end > geneInt.end:
@@ -70,6 +115,22 @@ def test5PrimeOverlap(lincInt,geneInt):
         raise ValueError("Could not determine")
 
 def bpOverlap(lincInt,geneInt):
+    """Returns the number of base pairs of overlap between two genomic intervals.
+
+    Sorts the four boundary coordinates and computes the inner distance as the
+    length of the shared region.
+
+    Args:
+        lincInt: An interval object with start and end attributes.
+        geneInt: An interval object with start and end attributes that must
+            overlap with lincInt.
+
+    Returns:
+        Integer number of overlapping base pairs between the two intervals.
+
+    Raises:
+        AssertionError: If the two intervals do not overlap.
+    """
     assert lincInt.overlaps(geneInt), "%s and %s do not overlap" % (lincInt.name,geneInt.name)
     bounds = [lincInt.start,lincInt.end,geneInt.start,geneInt.end]
     bounds.sort()
@@ -78,6 +139,13 @@ def bpOverlap(lincInt,geneInt):
     return overlap
 
 def printLincs(handle,lincs):
+    """Writes a collection of lincRNA GTF records to a file handle.
+
+    Args:
+        handle: Writable file-like object to receive the GTF output.
+        lincs: Iterable of lincRNA objects, each exposing a getGTF() method
+            that returns a GTF-formatted string.
+    """
     for linc in lincs:
         print(linc.getGTF(), end=' ', file=handle)
 
@@ -86,6 +154,23 @@ def printLincs(handle,lincs):
 ############
 
 def main(gtfFile,genome='hg19'):
+    """Assigns systematic names to all lincRNA loci in a GTF file.
+
+    Reads lincRNA transcript models from gtfFile, retrieves protein-coding
+    RefSeq transcripts for the specified genome build, and applies the
+    bidirectional promoter, antisense, and proximity naming rules to produce
+    a set of named lincRNA objects.
+
+    Args:
+        gtfFile: Path to a GTF file of unannotated lincRNA loci (as produced
+            by Cufflinks or similar assemblers).
+        genome: UCSC genome build identifier used to fetch RefSeq transcripts
+            (default: 'hg19').
+
+    Returns:
+        A set of lincRNA gene objects with updated name attributes following
+        the systematic naming convention.
+    """
     #Parse GTF File for lincs
     lincIter = GTFlib.GTFGeneIterator(gtfFile,verbose=verbose)
 
@@ -198,6 +283,12 @@ def main(gtfFile,genome='hg19'):
 #Tests
 ############
 def test():
+    """Runs a full naming test using hardcoded Broad Institute file paths.
+
+    Calls main() on a hard-coded lincRNA GTF file, writes the named output
+    to a companion file, and prints a completion message to stderr.  Intended
+    for interactive/development testing only.
+    """
     fname = '/seq/rinnscratch/cole/ftp/assemblies/linc_catalog.gtf'
     outHandle = open('/seq/rinnscratch/cole/ftp/assemblies/linc_catalog_named.gtf','w')
     verbose=True
diff --git a/src/seqlib/lincRNAs.py b/src/seqlib/lincRNAs.py
index 84d58ad..0ff4d20 100644
--- a/src/seqlib/lincRNAs.py
+++ b/src/seqlib/lincRNAs.py
@@ -1,8 +1,10 @@
-'''
-Created on Jun 3, 2010
+"""Utilities for processing lincRNA (long intergenic non-coding RNA) transcript models.
 
-@author: lgoff
-'''
+Processes BED-format lincRNA annotations to fetch spliced sequences, insert
+records into a MySQL database, generate transcript model PNG plots, and export
+sequences to FASTA format.  Requires a local MySQL instance at the Broad
+Institute and the intervallib package.
+"""
 import os
 import sys
 
@@ -13,7 +15,18 @@
 
 
 def main(bedFile,lincLotID):
-    
+    """Processes a BED file of lincRNA models and inserts them into the database.
+
+    For each transcript in the BED file, fetches its spliced sequence,
+    creates a PNG transcript model plot, and bulk-inserts all records into the
+    lgoff_nextgen MySQL database using mysqlimport.
+
+    Args:
+        bedFile: Path to a BED-format file of lincRNA transcript models.
+        lincLotID: Integer identifier for the lincRNA lot/batch being
+            processed; used as a foreign key in the database insert.
+    """
+
     #Setup environment
     if not os.path.exists('transcriptModels'):
         os.mkdir('transcriptModels')
@@ -56,6 +69,19 @@ def main(bedFile,lincLotID):
     return
 
 def drawModelPNG(bedRecord,outDir=os.getcwd(),verbose=False):
+    """Generates a PNG transcript model image for a single BED record.
+
+    Delegates to the BED record's makePNG method and optionally prints
+    progress information to stdout.
+
+    Args:
+        bedRecord: An intervallib BED interval object that exposes a
+            makePNG(outDir) method and a name attribute.
+        outDir: Directory path where the PNG file will be written
+            (default: current working directory).
+        verbose: If True, print status messages indicating which transcript
+            model is being drawn (default: False).
+    """
     if verbose:
         print("Making transcript model plot...")
     bedRecord.makePNG(outDir)
@@ -64,7 +90,19 @@ def drawModelPNG(bedRecord,outDir=os.getcwd(),verbose=False):
     return
 
 def insertRecord(lincRNA,lincLotID):
-    """Does not work for some reason..."""
+    """Inserts a single lincRNA transcript record into the database.
+
+    Constructs and executes an INSERT SQL statement for the transcripts table.
+    The function references a module-level db cursor variable which must be
+    set before calling.  Note: this function is known to be non-functional;
+    use the bulk mysqlimport approach in main() instead.
+
+    Args:
+        lincRNA: An intervallib interval object with attributes: name, chr,
+            start, end, strand, exonLengths, exonOffsets, and splicedSequence.
+        lincLotID: Integer lot identifier to associate with the transcript
+            record in the database.
+    """
     
     cursor = db.cursor()
     insert="INSERT INTO transcripts VALUES (NULL,'%s','%s','%d','%d','%s','%s','%s','%s','%d');" % (lincRNA.name,lincRNA.chr,lincRNA.start,lincRNA.end,lincRNA.strand,",".join([str(x) for x in lincRNA.exonLengths]),",".join([str(x) for x in lincRNA.exonOffsets]),lincRNA.splicedSequence,int(lincLotID))
@@ -77,6 +115,14 @@ def insertRecord(lincRNA,lincLotID):
     return
 
 def getDb():
+    """Opens and returns a connection to the Broad Institute MySQL database.
+
+    Connects to the lgoff_nextgen database on mysql.broadinstitute.org with
+    a hard-coded user and empty password.
+
+    Returns:
+        A MySQLdb connection object for the lgoff_nextgen database.
+    """
     host="mysql.broadinstitute.org"
     user="lgoff"
     password=""
diff --git a/src/seqlib/misc.py b/src/seqlib/misc.py
index dae4235..7dffc92 100644
--- a/src/seqlib/misc.py
+++ b/src/seqlib/misc.py
@@ -1,4 +1,10 @@
 #!/usr/bin/python
+"""Miscellaneous utility functions for sequence analysis, data structures, and pretty printing.
+
+Provides tools for nuID encoding/decoding of nucleotide sequences, dictionary sorting,
+pretty-printing of nested data structures, ranking/ordering utilities, and basic string
+manipulation functions used across the seqlib package.
+"""
 import sys
 
 
@@ -6,8 +12,28 @@
 #pygr tools
 #############
 class Annot:
-    """Annotation class for pygr data"""
+    """Annotation class for pygr data.
+
+    A lightweight container for genomic annotation records used with the pygr
+    genome database library.
+
+    Attributes:
+        name: Identifier for the annotation (e.g. gene name or transcript ID).
+        chr: Chromosome name (e.g. 'chr1').
+        strand: Strand orientation ('+' or '-').
+        start: 0-based start coordinate of the annotation.
+        end: End coordinate of the annotation.
+    """
     def __init__(self,name,chr,strand,start,end):
+        """Initialises an Annot instance.
+
+        Args:
+            name: Identifier for the annotation.
+            chr: Chromosome name.
+            strand: Strand orientation ('+' or '-').
+            start: 0-based start coordinate.
+            end: End coordinate.
+        """
         self.name=name
         self.chr=chr
         self.strand=strand
@@ -18,12 +44,44 @@ def __init__(self,name,chr,strand,start,end):
 #nuID implementation for python
 ###################
 def mreplace(s,chararray=['A','C','G','T','U'],newarray=['0','1','2','3','3']):
+    """Replaces multiple characters in a string using paired replacement arrays.
+
+    Iterates over corresponding pairs from chararray and newarray, replacing
+    each occurrence of chararray[i] with newarray[i] in sequence.  Defaults
+    map the nucleotide alphabet (A, C, G, T, U) to single-digit codes used
+    by the nuID encoding scheme.
+
+    Args:
+        s: Input string to perform replacements on.
+        chararray: List of characters (or substrings) to replace.
+        newarray: List of replacement characters (or substrings), paired
+            positionally with chararray.
+
+    Returns:
+        The modified string after all replacements have been applied.
+    """
     for a,b in zip(chararray,newarray):
         s=s.replace(a,b)
     return s
 
 def seq2nuID(seq):
-    """Converts a string DNA or RNA sequence into its corresponding 'nuID'"""
+    """Converts a DNA or RNA sequence string into its corresponding nuID.
+
+    The nuID (nucleotide identifier) is a compact, base-64-like encoding of a
+    nucleotide sequence that encodes both sequence content and a checksum
+    character.  This implementation replaces the standard "_" character in the
+    code alphabet with "!" to avoid conflicts with SHRiMP alignment output
+    parsing.
+
+    Args:
+        seq: A DNA or RNA sequence string (case-insensitive; 'U' is treated
+            identically to 'T').
+
+    Returns:
+        A nuID string whose first character encodes checksum and padding
+        information and whose remaining characters encode successive triplets
+        of nucleotides in base-64 space.
+    """
 
     """
         Default code includes "_" as char.  This conflicts with parsing for shrimp.  So for my specific instance,
@@ -55,6 +113,25 @@ def seq2nuID(seq):
     return id
 
 def nuID2seq(nuID):
+    """Decodes a nuID string back into the original nucleotide sequence.
+
+    Reverses the nuID encoding produced by seq2nuID.  The first character of
+    the nuID encodes checksum and padding length; the remaining characters are
+    decoded from base-64 triplets back to the ACGT alphabet.  This
+    implementation uses "!" instead of "_" in the code alphabet (matching
+    seq2nuID) to avoid conflicts with SHRiMP output parsing.
+
+    Args:
+        nuID: A nuID string as produced by seq2nuID.
+
+    Returns:
+        The original DNA sequence string (uppercase ACGT).
+
+    Raises:
+        AssertionError: If the nuID contains the '.' character as a check code
+            (which would indicate a coding error or invalid nuID), or if the
+            checksum validation fails.
+    """
     """
         Default code includes "_" as char.  This conflicts with parsing for shrimp.  So for my specific instance,
         "_" has been replaced with "!"
@@ -98,7 +175,16 @@ def sort_by_value(d):
     return [ backitems[i][1] for i in range(0,len(backitems))]
 
 def sbv2(d,reverse=False):
-    ''' proposed in PEP 265, using  the itemgetter '''
+    """Returns dictionary items sorted by value, using itemgetter (PEP 265 approach).
+
+    Args:
+        d: A dictionary to sort.
+        reverse: Not currently used; items are always sorted in descending
+            order by value regardless of this parameter.
+
+    Returns:
+        A list of (key, value) tuples sorted by value in descending order.
+    """
     from operator import itemgetter
     return sorted(d.items(), key=itemgetter(1), reverse=True)
 
@@ -110,6 +196,17 @@ def sortListofDicts(fieldname):
     return lambda x: x[fieldname]
 
 def sort_dict(d,reverse=True):
+    """Returns dictionary items sorted first by value then by key.
+
+    Args:
+        d: A dictionary to sort.
+        reverse: If True (default), sort in descending order; if False,
+            sort in ascending order.
+
+    Returns:
+        A list of (key, value) tuples sorted by (value, key) using the
+        specified direction.
+    """
     return sorted(d.items(), key=lambda item: (item[1], item[0]), reverse=reverse)
 
 ########
@@ -118,6 +215,29 @@ def sort_dict(d,reverse=True):
 #
 ########
 def pretty_print(f, d, level=-1, maxw=0, maxh=0, gap="", first_gap='', last_gap=''):
+    """Recursively pretty-prints a nested Python data structure to a file stream.
+
+    Handles lists, tuples, dicts, class instances, and scalar values, printing
+    each with indentation that reflects the nesting depth.  Optionally limits
+    the depth of recursion, the width of each printed line, and the number of
+    elements printed per container.
+
+    Args:
+        f: Output file stream (e.g. sys.stdout or an open file handle).
+        d: The data structure to print.
+        level: Maximum recursion depth.  -1 (default) means unlimited depth.
+            0 means stop recursing and print a repr of the current element.
+        maxw: Maximum character width for a single printed line.  0 (default)
+            means no width limit.
+        maxh: Maximum number of elements to print from any list, tuple, or
+            dict at any recursion level.  0 (default) means no limit.
+        gap: Indentation prefix inserted before each element inside a
+            container.
+        first_gap: Prefix printed before the opening bracket/brace/paren of
+            a container, or before a scalar value.
+        last_gap: Prefix printed before the closing bracket/brace/paren of
+            a container.
+    """
     # depending on the type of expression, it recurses through its elements
     # and prints with appropriate indentation
 
@@ -282,7 +402,23 @@ def pretty_print(f, d, level=-1, maxw=0, maxh=0, gap="", first_gap='', last_gap=
             f.write(first_gap+repr(d)+'\n')
 
 def pp(d,level=-1,maxw=0,maxh=0,parsable=0):
-    """ wrapper around pretty_print that prints to stdout"""
+    """Pretty-prints a data structure to stdout.
+
+    Wrapper around pretty_print that writes to sys.stdout.  When parsable is
+    set to a truthy value the standard library pprint module is used instead,
+    which produces output that can be eval'd back to the original structure.
+
+    Args:
+        d: The data structure to print.
+        level: Maximum recursion depth passed to pretty_print.  -1 means
+            unlimited.
+        maxw: Maximum line width passed to pretty_print (or pprint width when
+            parsable is set).  0 means no limit.
+        maxh: Maximum container height passed to pretty_print.  0 means no
+            limit.
+        parsable: If 0 (default), use pretty_print for human-readable output.
+            If non-zero, use the standard library pprint module.
+    """
     if not parsable:
         pretty_print(sys.stdout, d, level, maxw, maxh, '', '', '')
     else:
@@ -292,6 +428,12 @@ def pp(d,level=-1,maxw=0,maxh=0,parsable=0):
         pp2.pprint(d)
 
 def test_pp():
+    """Runs a self-contained smoke test of the pp / pretty_print functions.
+
+    Calls pp with a heterogeneous nested data structure containing dicts,
+    lists, tuples, integers, strings, and a lambda.  Output is written to
+    stdout.  No return value.
+    """
     pp({'one': ('two',3,[4,5,6]),
         7: (lambda x: 8*9),
         'ten': ['ele', {'ven': 12,
@@ -320,6 +462,26 @@ def ifab(test, a, b):
 #
 ####################################
 def sfill(s, length, fill_char = '.'):
+    """Pads a string on the right with a fill character until it reaches the target length.
+
+    Example::
+
+        sfill('hello', 18, '.') -> 'hello.............'
+        #                           <---  18 chars  --->
+
+    Useful for aligning dictionary keys when pretty-printing:
+    ``one......: 1``, ``five.....: 5``, ``seventeen: 17``.
+
+    Args:
+        s: The input string to pad.
+        length: The desired total length of the returned string.
+        fill_char: The character used for padding (default: '.').
+
+    Returns:
+        The input string right-padded with fill_char to the specified length.
+        If the input string is already at least as long as length, it is
+        returned unchanged.
+    """
     #  Appends fill_char to the string s until it reaches length length
     #  ex:  sfill('hello',18,'.') -> hello...............
     #                                <---  18 chars  --->
@@ -336,6 +498,20 @@ def sfill(s, length, fill_char = '.'):
     return s + fill_char*(length-len(s))
 
 def rstrips(s, suffix):
+    """Strips a specific suffix from the right end of a string.
+
+    Unlike str.rstrip, this function removes the exact suffix string rather
+    than a set of characters.
+
+    Args:
+        s: The input string.
+        suffix: The exact suffix to remove.  If empty or not present at the
+            end of s, the string is returned unchanged.
+
+    Returns:
+        The input string with the suffix removed from the right end, or the
+        original string if the suffix was not found.
+    """
     if suffix and s.endswith(suffix):
         s = s[:-len(suffix)]
     return s
@@ -459,6 +635,17 @@ def rank(x, NoneIsLast=True, decreasing = False, ties = "first"):
     return R
 
 def uniqify(seq):
+    """Returns the unique elements of an iterable as a list.
+
+    Not order-preserving: the returned list may appear in arbitrary order
+    because uniqueness is tracked via a dictionary.
+
+    Args:
+        seq: An iterable of hashable elements.
+
+    Returns:
+        A list containing each unique element from seq exactly once.
+    """
     # Not order preserving
     keys = {}
     for e in seq:
diff --git a/src/seqlib/myDataTypes.py b/src/seqlib/myDataTypes.py
index dea6473..8616d72 100644
--- a/src/seqlib/myDataTypes.py
+++ b/src/seqlib/myDataTypes.py
@@ -1,4 +1,10 @@
 '''
+Custom data type implementations for seqlib/RNASeq data processing.
+
+Provides a Stack (LIFO), a binary search tree (BinaryTree / BinaryNode /
+EmptyNode), and a directed-graph (Graph) useful for path-finding in
+acyclic graphs.
+
 Created on Dec 14, 2009
 
 My custom data types to help with RNASeq data
@@ -9,113 +15,329 @@
 
 class Stack:
     '''
-    Basic 'stack' data type
+    A last-in, first-out (LIFO) stack data structure backed by a Python list.
+
+    Supports push, pop, and peek operations, and delegates unknown attribute
+    lookups to the underlying list so list methods are accessible directly.
     '''
+
     def __init__(self, start=[]):
         '''
-        Constructor
+        Initialize the Stack, optionally pre-loading it with items.
+
+        Items from start are pushed in order and then reversed so that the
+        first element of start ends up at the top of the stack.
+
+        Args:
+            start: An optional list of items to pre-load. Defaults to [].
         '''
         self.stack = []
         for x in start: self.push(x)
         self.reverse()
-    
-    def push(self,obj):
+
+    def push(self, obj):
+        """Push an item onto the top of the stack.
+
+        Args:
+            obj: The object to place on top of the stack.
+        """
         self.stack = [obj] + self.stack
-    
+
     def pop(self):
+        """Remove and return the item at the top of the stack.
+
+        Returns:
+            The top item of the stack.
+
+        Raises:
+            stack2.error: If the stack is empty (underflow).
+        """
         if not self.stack: raise error('underflow')
         top, self.stack = self.stack[0], self.stack[1:]
         return top
 
     def top(self):
+        """Return the top item without removing it.
+
+        Returns:
+            The item currently at the top of the stack.
+
+        Raises:
+            stack2.error: If the stack is empty (underflow).
+        """
         if not self.stack: raise error('underflow')
         return self.stack[0]
-    
+
     def empty(self):
+        """Return True if the stack contains no items.
+
+        Returns:
+            True if the stack is empty, False otherwise.
+        """
         return not self.stack
-    
+
     #Overloads
     def __repr__(self):
+        """Return a string representation of the stack."""
         return '[Stack:%s]' % self.stack
-    
-    def __cmp__(self,other):
+
+    def __cmp__(self, other):
+        """Compare this stack to another by their underlying lists."""
         return cmp(self.stack, other.stack)
-    
+
     def __len__(self):
+        """Return the number of items in the stack."""
         return len(self.stack)
-    
-    def __add__(self,other):
+
+    def __add__(self, other):
+        """Concatenate two stacks and return a new Stack.
+
+        Args:
+            other: Another Stack instance to append.
+
+        Returns:
+            A new Stack containing items from this stack followed by other's.
+        """
         return Stack(self.stack+other.stack)
-    
-    def __mul__(self,reps):
+
+    def __mul__(self, reps):
+        """Repeat the stack contents reps times and return a new Stack.
+
+        Args:
+            reps: An integer number of times to repeat.
+
+        Returns:
+            A new Stack with the contents repeated reps times.
+        """
         return Stack(self.stack * reps)
-    
-    def __getitem__(self,offset):
+
+    def __getitem__(self, offset):
+        """Return the item at the given index.
+
+        Args:
+            offset: An integer index into the underlying list.
+
+        Returns:
+            The item at position offset.
+        """
         return self.stack[offset]
-    
-    def __getslice__(self,low,high):
+
+    def __getslice__(self, low, high):
+        """Return a new Stack containing the slice from low to high.
+
+        Args:
+            low: The start index of the slice.
+            high: The end index of the slice (exclusive).
+
+        Returns:
+            A new Stack containing the sliced elements.
+        """
         return Stack(self.stack[low:high])
-    
-    def __getattr__(self,name):
-        return getattr(self.stack,name)
+
+    def __getattr__(self, name):
+        """Delegate attribute lookup to the underlying list.
+
+        Args:
+            name: The attribute name to look up on the underlying list.
+
+        Returns:
+            The attribute from the underlying list.
+        """
+        return getattr(self.stack, name)
         
 
 ##################
 #Binary Trees
 ##################
 class BinaryTree:
+    """A binary search tree that delegates to recursive BinaryNode/EmptyNode objects.
+
+    Stores values in sorted order and supports O(log n) average-case lookup
+    and insertion. Duplicate values are silently ignored.
+    """
+
     def __init__(self):
+        """Initialize an empty BinaryTree."""
         self.tree = EmptyNode()
+
     def __repr__(self):
+        """Return a parenthesized string representation of the tree."""
         return repr(self.tree)
-    def lookup(self,value):
+
+    def lookup(self, value):
+        """Return 1 if value exists in the tree, 0 otherwise.
+
+        Args:
+            value: The value to search for.
+
+        Returns:
+            1 if the value is present, 0 if it is not.
+        """
         return self.tree.lookup(value)
-    def insert(self,value):
+
+    def insert(self, value):
+        """Insert value into the tree, maintaining sort order.
+
+        If value already exists in the tree it is not inserted again.
+
+        Args:
+            value: The value to insert.
+        """
         self.tree = self.tree.insert(value)
 
 class EmptyNode:
+    """Sentinel node representing an empty position in a BinaryTree.
+
+    Acts as the leaf terminator: lookup always fails and insert creates a
+    new BinaryNode at this position.
+    """
+
     def __repr__(self):
+        """Return '*' to represent an empty node."""
         return "*"
-    def lookup(self,value):                 #Fail at the bottom
+
+    def lookup(self, value):
+        """Return 0 because an empty node contains no value.
+
+        Args:
+            value: The value being searched for (unused).
+
+        Returns:
+            Always 0 (not found).
+        """
         return 0
-    def insert(self,value):
-        return BinaryNode(self,value,self)  #Add new node at bottom
+
+    def insert(self, value):
+        """Create a new BinaryNode at this position with value.
+
+        Args:
+            value: The value to store in the new node.
+
+        Returns:
+            A new BinaryNode with empty left and right children.
+        """
+        return BinaryNode(self, value, self)  #Add new node at bottom
     
 class BinaryNode:
-    def __init__(self,left,value,right):
-        self.data,self.left,self.right = value,left,right
-    def lookup(self,value):
+    """An internal node of a binary search tree holding a value and two subtrees.
+
+    Attributes:
+        data: The value stored at this node.
+        left: The left child node (values less than data).
+        right: The right child node (values greater than data).
+    """
+
+    def __init__(self, left, value, right):
+        """Initialize a BinaryNode with left subtree, a value, and right subtree.
+
+        Args:
+            left: The left child (a BinaryNode or EmptyNode).
+            value: The value to store at this node.
+            right: The right child (a BinaryNode or EmptyNode).
+        """
+        self.data, self.left, self.right = value, left, right
+
+    def lookup(self, value):
+        """Search for value in the subtree rooted at this node.
+
+        Args:
+            value: The value to search for.
+
+        Returns:
+            1 if value is found in this subtree, 0 otherwise.
+        """
         if self.data == value:
             return 1
-        elif self.data>value:
+        elif self.data > value:
             return self.left.lookup(value)
         else:
             return self.right.lookup(value)
-    def insert(self,value):
+
+    def insert(self, value):
+        """Insert value into the subtree rooted at this node.
+
+        Traverses left if value is less than this node's data, right if
+        greater. Equal values are ignored (no duplicates stored).
+
+        Args:
+            value: The value to insert.
+
+        Returns:
+            This node (possibly with an updated child subtree).
+        """
         if self.data > value:
             self.left = self.left.insert(value)
         elif self.data < value:
             self.right = self.right.insert(value)
         return self
+
     def __repr__(self):
+        """Return a parenthesized inorder string representation of this subtree."""
         return '( %s, %s, %s )' % (repr(self.left), repr(self.data), repr(self.right))
 
 ################
 #Directed Acyclic Graphs
 ################
 class Graph:
-    def __init__(self,label,extra=None):
+    """A node in a directed acyclic graph (DAG) that supports path-finding.
+
+    Each Graph node has a label, optional data payload, and a list of
+    outgoing edges to other Graph nodes. Multiple paths between nodes are
+    found via depth-first search and stored as class-level state in
+    Graph.solns.
+
+    Attributes:
+        name: A string label identifying this node.
+        data: An optional data payload associated with this node.
+        edges: A list of Graph nodes reachable from this node.
+    """
+
+    def __init__(self, label, extra=None):
+        """Initialize a Graph node with a label and optional data.
+
+        Args:
+            label: A string name for this node.
+            extra: An optional data object to associate with the node.
+                Defaults to None.
+        """
         self.name = label
         self.data = extra
         self.edges = []
+
     def __repr__(self):
+        """Return the node's label as its string representation."""
         return self.name
-    def search(self,goal):
+
+    def search(self, goal):
+        """Find all acyclic paths from this node to goal.
+
+        Resets Graph.solns, performs a depth-first search via generate(),
+        and sorts found paths by length (shortest first).
+
+        Args:
+            goal: A Graph node to search for.
+
+        Returns:
+            A list of paths (each path is a list of Graph nodes) from this
+            node to goal, sorted by path length ascending.
+        """
         Graph.solns = []
-        self.generate([self],goal)
-        Graph.solns.sort(lambda x,y: cmp(len(x), len(y)))
+        self.generate([self], goal)
+        Graph.solns.sort(lambda x, y: cmp(len(x), len(y)))
         return Graph.solns
-    def generate(self,path,goal):
+
+    def generate(self, path, goal):
+        """Recursively explore paths from this node towards goal.
+
+        Appends the current path to Graph.solns when goal is reached.
+        Avoids cycles by checking whether each neighbor is already in the
+        current path before recursing.
+
+        Args:
+            path: A list of Graph nodes representing the current path from
+                the search origin to this node.
+            goal: A Graph node to find.
+        """
         if self == goal:
             Graph.solns.append(path)
         else:
diff --git a/src/seqlib/mySam.py b/src/seqlib/mySam.py
index 341d89f..570abd9 100644
--- a/src/seqlib/mySam.py
+++ b/src/seqlib/mySam.py
@@ -1,6 +1,12 @@
 '''
+Miscellaneous tools to get information from a SAM/BAM file.
+
+Provides utilities for parsing SAM/BAM alignment files, computing read
+pileups, fetching strand-specific coverage arrays, and plotting read
+density across genomic intervals. Built on top of pysam.
+
 Created on Oct 25, 2009
-Misc tools to get information from a SAM/BAM file...
+
 @author: lgoff
 '''
 import array
@@ -17,20 +23,65 @@
 # from inOut.wiggle import WiggleFileWriter  # NOTE: inOut.wiggle module not available; WiggleFileWriter commented out
 
 class SAMAlignment(Alignment):
-    """Basic object for SAMstring (extends Alignment class)"""
+    """Basic object representing a single SAM alignment record.
+
+    Extends the Alignment base class with SAM-specific fields for the
+    CIGAR string and base-quality string.
+
+    Attributes:
+        qual: Base-quality string from SAM field 11.
+        cigar: CIGAR string from SAM field 6 describing the alignment.
+    """
+
     def __init__(self,readname,chr,start,end,strand,score,readcount,readsequence,cigar,qualstring):
+        """Initialises a SAMAlignment.
+
+        Args:
+            readname: Query template name (SAM field 1).
+            chr: Reference sequence name / chromosome (SAM field 3).
+            start: 1-based leftmost mapping position (SAM field 4).
+            end: Computed end position (start + read length - 1).
+            strand: Strand of the alignment, one of '+' or '-'.
+            score: Mapping quality score (SAM field 5).
+            readcount: Number of reads represented by this alignment
+                (typically 1 for a single record).
+            readsequence: Read sequence bases (SAM field 10).
+            cigar: CIGAR string describing alignment operations (SAM field 6).
+            qualstring: ASCII-encoded base-quality string (SAM field 11).
+        """
         Alignment.__init__(self,readname,chr,start,end,strand,score=readcount,readcount = readcount,readsequence=readsequence)
         self.qual = qualstring
         self.cigar = cigar
 
 def SAMReader(fname):
-    """Iterator for SAMAlignment records (depricated, use pysam)"""
+    """Iterate over SAM alignment records from a file.
+
+    Deprecated — use pysam directly for new code.
+
+    Args:
+        fname: Path to the SAM file.
+
+    Yields:
+        An Interval object for each alignment record in the file.
+    """
     handle = open(fname,'r')
     for line in handle:
         aln = parseSAMString(line)
         yield aln.toInterval()
 
 def parseSAMString(samstring):
+    """Parse a single SAM-format line into a SAMAlignment object.
+
+    Reads are assumed to be non-paired and non-spliced; the end position is
+    derived from the start position plus the read-sequence length.
+
+    Args:
+        samstring: A single tab-delimited SAM record line (no trailing
+            newline required — it is stripped internally).
+
+    Returns:
+        A SAMAlignment instance populated from the SAM fields.
+    """
     tokens = samstring.rstrip().split("\t")
     readname = tokens[0]
     chr = tokens[2]
@@ -45,7 +96,22 @@ def parseSAMString(samstring):
     return SAMAlignment(readname,chr,start,end,strand,score,readcount,readsequence,cigar,qualstring)
 
 def pileup2wig(fname,shortname,outDir=os.getcwd()+"/"):
-    """Don't use this...it's lazy and it doesn't feel right"""
+    """Convert a samtools pileup file to strand-specific wiggle files.
+
+    Reads a samtools pileup output file and writes two variableStep wiggle
+    files: one for the plus strand (forward reads, indicated by '.') and one
+    for the minus strand (reverse reads, indicated by ','). This
+    implementation is noted as incomplete / not recommended for production
+    use.
+
+    Args:
+        fname: Path to the samtools pileup file to read.
+        shortname: Base name used for both the wiggle track labels and the
+            output file names (``<shortname>_plus.wig`` and
+            ``<shortname>_minus.wig``).
+        outDir: Directory in which the output wiggle files are written.
+            Defaults to the current working directory.
+    """
     handle = open(fname,'r')
     preRef = ''
     prePos = -1
@@ -56,6 +122,16 @@ def pileup2wig(fname,shortname,outDir=os.getcwd()+"/"):
     minusHand = open(outDir+shortname+"_minus.wig",'w')
 
     def wigHeader(shortname,strand):
+        """Build a UCSC wiggle track-definition header line.
+
+        Args:
+            shortname: Base name used in the track name and description fields.
+            strand: Strand of the track, either '+' (blue) or '-' (red).
+
+        Returns:
+            A wiggle track header string suitable for writing as the first
+            line of a wiggle file.
+        """
         if strand=="+":
             color = '0,0,255'
             sName = 'plus'
@@ -84,17 +160,49 @@ def wigHeader(shortname,strand):
     minusHand.close()
 
 class Counter:
-    """Use in callback function to store read counts within an alignment (includes those that
-    are not completely contained within the alignment"""
+    """Callable that accumulates a total read count for use as a pysam callback.
+
+    Designed to be passed as a callback to pysam fetch/pileup methods.
+    Counts all reads that overlap the queried region, including those not
+    completely contained within it.
+
+    Attributes:
+        mCounts: Running total of reads seen so far.
+    """
+
     mCounts = 0
+
     def __call__(self,alignment):
+        """Increment the read counter by one for each alignment seen.
+
+        Args:
+            alignment: A pysam AlignedSegment (or compatible) object.
+                The alignment itself is not inspected; only its presence
+                increments the counter.
+        """
         self.mCounts += 1
 
 class StrandCounter:
-    """Provides a strand-specific number of reads as opposed to total read density"""
+    """Callable that accumulates strand-specific read counts for use as a pysam callback.
+
+    Separates reads into forward (plus) and reverse (minus) strand tallies
+    rather than combining them into a single total.
+
+    Attributes:
+        plusCount: Running total of forward-strand reads seen.
+        minusCount: Running total of reverse-strand reads seen.
+    """
+
     plusCount = 0
     minusCount = 0
+
     def __call__(self,alignment):
+        """Increment the appropriate strand counter for each alignment seen.
+
+        Args:
+            alignment: A pysam AlignedSegment (or compatible) object.
+                Strand is determined from the ``is_reverse`` flag.
+        """
         if alignment.is_reverse:
             self.minusCount += 1
         else:
@@ -102,14 +210,34 @@ def __call__(self,alignment):
 
 
 def getBitValue(n, p):
-    '''
-    get the bitvalue of denary (base 10) number n at the equivalent binary
-    position p (binary count starts at position 0 from the right)
-    '''
+    """Return the bit at position p of integer n.
+
+    Extracts the single bit at binary position p (zero-indexed from the
+    least-significant bit) of the denary integer n.
+
+    Args:
+        n: A non-negative integer whose bit is to be inspected.
+        p: Zero-based bit position (0 = least-significant / rightmost bit).
+
+    Returns:
+        1 if the bit at position p is set, 0 otherwise.
+    """
     return (n >> p) & 1
 
 def strandFlag(flag):
-    """Returns strand of sequence from SAM record bitflag (field 4)"""
+    """Determine the alignment strand from a SAM bitflag value.
+
+    Inspects bit 4 (0x10) of the SAM FLAG field to determine whether the
+    read mapped to the reverse strand.
+
+    Args:
+        flag: The integer SAM FLAG value (field 2), or a string
+            representation of it.
+
+    Returns:
+        '+' if bit 4 is 0 (forward strand), '-' if bit 4 is 1 (reverse
+        strand), or '*' for any other value.
+    """
     flag = int(flag)
     if getBitValue(flag,4)==0:
         return "+"
@@ -119,11 +247,24 @@ def strandFlag(flag):
         return "*"
 
 def makeCigar():
+    """Placeholder for CIGAR string construction.
+
+    Not yet implemented.
+    """
     pass
 
 def samScanByStrand(samFetch,strand):
-    """Generator to iterate over a samFetch using only one of the strands.
-    strand should be one of ["+","-"]
+    """Yield only reads that map to the specified strand from a pysam fetch iterator.
+
+    Args:
+        samFetch: An iterable of pysam AlignedSegment objects, typically
+            returned by ``pysam.AlignmentFile.fetch()``.
+        strand: The strand to retain.  Must be '+' (forward, non-reverse
+            reads) or '-' (reverse reads).
+
+    Yields:
+        pysam AlignedSegment objects whose strand matches the requested
+        strand value.
     """
     for read in samFetch:
         if strand == "+":
@@ -138,6 +279,19 @@ def samScanByStrand(samFetch,strand):
                 continue
 
 def sam2Interval(samRead):
+    """Convert a pysam AlignedSegment to an intervallib Interval object.
+
+    The interval uses 1-based coordinates (pysam's 0-based ``pos`` is
+    incremented by 1) and a readcount of 1.
+
+    Args:
+        samRead: A pysam AlignedSegment object with valid ``rname``,
+            ``pos``, ``seq``, and ``is_reverse`` attributes.
+
+    Returns:
+        An intervallib.Interval representing the read's mapped region,
+        with strand set to '+' or '-' according to ``samRead.is_reverse``.
+    """
     if samRead.is_reverse:
         strand = "-"
     else:
@@ -146,7 +300,26 @@ def sam2Interval(samRead):
 
 
 def samReadsIntersect(a,b,useStrand = True,offset=0):
-    """Checks to see if two samReads (a,b) intersect"""
+    """Determine whether two pysam AlignedSegment reads overlap each other.
+
+    Two reads are considered to intersect if their mapped positions overlap
+    (allowing for an optional extension by ``offset`` bases). When
+    ``useStrand`` is True, reads on different strands or different reference
+    sequences are never considered to intersect.
+
+    Args:
+        a: A pysam AlignedSegment object.
+        b: A pysam AlignedSegment object to compare against ``a``.
+        useStrand: If True (default), reads must be on the same reference
+            sequence and the same strand (``is_reverse`` must match) to
+            be considered intersecting.
+        offset: Number of extra bases by which each read's length is
+            extended before testing for overlap.  Defaults to 0.
+
+    Returns:
+        True if reads a and b overlap (subject to strand and offset rules),
+        False otherwise.
+    """
     if useStrand:
         if a.rname == b.rname and a.is_reverse == b.is_reverse:
             return not(a.pos>b.pos+len(b.seq)+offset or b.pos>a.pos+len(a.seq)+offset)
@@ -178,6 +351,22 @@ def makeContiguousIntervals2(samHandle,start='start',end='end',offset=0,useStran
             currentInterval = sam2Interval(current)
 """
 def makeContiguousIntervalsByStrand(samHandle,offset=0):
+    """Generate contiguous genomic intervals from a sorted BAM file, separately per strand.
+
+    Iterates over all reads in the BAM file and merges overlapping reads
+    (with optional extension by ``offset``) into contiguous intervals.
+    Processing is performed independently for the forward ('+') and reverse
+    ('-') strands.
+
+    Args:
+        samHandle: An open pysam AlignmentFile object (must be sorted).
+        offset: Number of bases by which read extents are extended when
+            testing for overlap.  Defaults to 0.
+
+    Yields:
+        intervallib.Interval objects representing contiguous merged regions,
+        with ``readcount`` reflecting the number of constituent reads.
+    """
     for strand in ["+","-"]:
         samFetch = samScanByStrand(samHandle.fetch(),strand)
         current = next(samFetch)
@@ -201,9 +390,41 @@ def generate_pileup_chunks(read_iterator,
                            dtype=numpy.uint32,
                            max_rlen=2048,
                            chunk_size=8192):
-    '''
-    don't use this function with RNA-seq data because it does not pileup spliced reads properly
-    '''
+    """Generate read-pileup data in contiguous chunks across a genomic region.
+
+    Iterates over a sorted stream of reads and accumulates per-base read
+    depth in fixed-size chunks, yielding each chunk as it is complete.
+    Reverse-strand reads may optionally be shifted upstream so that their
+    5' end corresponds to the inferred fragment start.
+
+    Note: Do not use with RNA-seq data — spliced reads are not handled
+    correctly.
+
+    Args:
+        read_iterator: An iterable of pysam AlignedSegment objects sorted
+            by position.
+        start: 0-based start of the region to pileup.
+        end: 0-based (exclusive) end of the region to pileup.
+        unique_only: If True (default), reads flagged as PCR/optical
+            duplicates (``is_duplicate``) are skipped.
+        merge_strands: If True, reverse-strand reads are shifted left by
+            ``(read_length - fragment_length)`` bases so both strands
+            contribute to the same inferred fragment positions.
+        fragment_length: Expected DNA fragment length used to extend reads.
+            A value <= 0 means use the actual read length unchanged.
+        dtype: numpy dtype for the internal accumulation array.
+            Defaults to numpy.uint32.
+        max_rlen: Maximum anticipated read length in bases.  The internal
+            buffer is sized to accommodate this.  Defaults to 2048.
+        chunk_size: Number of bases covered by each yielded chunk.
+            Must be >= max_rlen.  Defaults to 8192.
+
+    Yields:
+        Tuples of (chunk_start, chunk_end, chunk_array) where chunk_start
+        and chunk_end are offsets relative to ``start``, and chunk_array is
+        a numpy array of length (chunk_end - chunk_start) containing the
+        per-base read depth.
+    """
     assert chunk_size >= max_rlen
     assert end > start
     # figure out the boundaries of the first chunk
@@ -281,6 +502,26 @@ def bam_to_wiggle(inbamfile, wigfile,
                   merge_strands=False,
                   fragment_length=-1,
                   norm=False):
+    """Convert a BAM file to a compressed wiggle file.
+
+    Computes per-base read depth across every reference sequence in the BAM
+    file and writes the result as a wiggle file using WiggleFileWriter (from
+    the inOut.wiggle module).  Note: WiggleFileWriter is currently
+    unavailable — calling this function will raise a NameError.
+
+    Args:
+        inbamfile: Path to the input BAM file (must be sorted and indexed).
+        wigfile: Path to the output wiggle file to write.
+        unique_only: If True, reads flagged as PCR/optical duplicates are
+            excluded from the pileup.  Defaults to False.
+        merge_strands: If True, reverse-strand reads are shifted upstream
+            so both strands reflect inferred fragment start positions.
+            Defaults to False.
+        fragment_length: Expected DNA fragment length used to extend reads.
+            A value <= 0 means use the actual read length unchanged.
+        norm: If True, read depths are normalised to reads-per-kilobase per
+            million mapped reads (RPKM-style).  Defaults to False.
+    """
     #logger = logging.getLogger(__name__)
     bamfile = pysam.AlignmentFile(inbamfile, 'rb')
 
@@ -334,7 +575,28 @@ def bam_to_wiggle(inbamfile, wigfile,
     bamfile.close()
 
 def bamFetchFlank(bamHandle,chr,pos,flankSize=1000,fragment_length=200):
-    """This does not work with gapped alignments"""
+    """Compute merged-strand read-depth in a window centred on a genomic position.
+
+    Fetches reads from a BAM file within ``pos ± (flankSize + fragment_length)``
+    and accumulates per-base coverage into a numpy array.  Reverse-strand
+    reads are shifted upstream to align with their inferred fragment start.
+
+    Note: Does not handle gapped (spliced) alignments correctly.
+
+    Args:
+        bamHandle: An open pysam AlignmentFile object.
+        chr: Reference sequence name / chromosome to query.
+        pos: Centre position (0-based) of the window.
+        flankSize: Number of bases to include on each side of ``pos`` in the
+            returned array.  Defaults to 1000.
+        fragment_length: Expected DNA fragment length used to extend reverse-
+            strand reads.  A value <= 0 means use the actual read length.
+            Defaults to 200.
+
+    Returns:
+        A numpy array of length ``2 * flankSize + 1`` containing the
+        per-base read depth centred on ``pos``.
+    """
     #Create container to hold pos +- (flankSize+fragment_length)
     arr = numpy.zeros(2*(flankSize+fragment_length)+1)
     range = (pos-flankSize-fragment_length,pos+flankSize+fragment_length)
@@ -358,7 +620,32 @@ def bamFetchFlank(bamHandle,chr,pos,flankSize=1000,fragment_length=200):
     return arr[fragment_length:fragment_length+2*flankSize+1]
 
 def bamFetchFlank_byStrand(bamHandle,chr,pos,flankSize=1000,fragment_length=200,span=1):
-    """This does not work with gapped alignments"""
+    """Compute strand-specific read-depth arrays in a window centred on a genomic position.
+
+    Similar to ``bamFetchFlank`` but returns separate arrays for the sense
+    (forward) and antisense (reverse) strands.  Reverse-strand reads are
+    extended to the inferred fragment start when ``fragment_length`` exceeds
+    the read length.
+
+    Note: Does not handle gapped (spliced) alignments correctly.
+
+    Args:
+        bamHandle: An open pysam AlignmentFile object.
+        chr: Reference sequence name / chromosome to query.
+        pos: Centre position (0-based) of the window.
+        flankSize: Number of bases to include on each side of ``pos`` in
+            each returned array.  Defaults to 1000.
+        fragment_length: Expected DNA fragment length used to extend reverse-
+            strand reads.  A value <= 0 means use the actual read length.
+            Defaults to 200.
+        span: Step size for down-sampling the output arrays.  A value of 1
+            (default) returns every base; 2 returns every other base, etc.
+
+    Returns:
+        A tuple (senseArr, antisenseArr) where each element is a numpy
+        array of length ``(2 * flankSize + 1) / span`` containing per-base
+        read depth for the respective strand, centred on ``pos``.
+    """
     senseArr = numpy.zeros(2*(flankSize+fragment_length)+1)
     antisenseArr = numpy.zeros(2*(flankSize+fragment_length)+1)
 
@@ -386,7 +673,30 @@ def bamFetchFlank_byStrand(bamHandle,chr,pos,flankSize=1000,fragment_length=200,
     return (senseArr[fragment_length:fragment_length+2*flankSize+1:span],antisenseArr[fragment_length:fragment_length+2*flankSize+1:span])
 
 def bamFetchInterval(bamHandle,chr,start,end,fragment_length=200,span=1):
-    """This does not work with gapped alignments"""
+    """Compute strand-specific read-depth arrays across a genomic interval.
+
+    Fetches reads from the BAM file that overlap ``[start, end]`` and
+    accumulates per-base read depth separately for the sense and antisense
+    strands.  Reverse-strand reads whose actual length is less than
+    ``fragment_length`` are extended upstream to the inferred fragment start.
+
+    Note: Does not handle gapped (spliced) alignments correctly.
+
+    Args:
+        bamHandle: An open pysam AlignmentFile object.
+        chr: Reference sequence name / chromosome to query.
+        start: 0-based start of the interval.
+        end: 0-based end of the interval (inclusive).
+        fragment_length: Expected DNA fragment length used to extend reads.
+            A value <= 0 means use the actual read length unchanged.
+            Defaults to 200.
+        span: Step size for down-sampling the output arrays.  Defaults to 1.
+
+    Returns:
+        A tuple (senseArr, antisenseArr) where each element is a numpy
+        array of length ``(end - start + 1) / span`` containing per-base
+        read depth for the respective strand across the interval.
+    """
 
     senseArr = numpy.zeros(end-start+(2*fragment_length)+1)
     antisenseArr = numpy.zeros(end-start+(2*fragment_length)+1)
@@ -415,6 +725,24 @@ def bamFetchInterval(bamHandle,chr,start,end,fragment_length=200,span=1):
     return(senseArr[fragment_length:fragment_length+intervalSize:span],antisenseArr[fragment_length:fragment_length+intervalSize:span])
 
 def makeCigarMask(cigar,increment=1):
+    """Build a per-base mask vector from a CIGAR string.
+
+    Parses a text CIGAR string and produces a flat list where each element
+    corresponds to one reference base consumed by the alignment.  'M'
+    (match/mismatch) operations contribute ``increment`` to each position;
+    'N' (intron/skip) operations contribute 0.  Other CIGAR operations that
+    do not consume reference bases (e.g. 'I', 'S', 'H', 'P') are omitted
+    from the output.
+
+    Args:
+        cigar: A CIGAR string such as ``'36M'`` or ``'20M1000N16M'``.
+        increment: Value assigned to each matched ('M') reference base in
+            the output mask.  Defaults to 1.
+
+    Returns:
+        A list of numeric values (each 0 or ``increment``) with one entry
+        per reference base consumed by the alignment.
+    """
     incrementTable = {
                       'M':increment,
                       'N':0
@@ -440,6 +768,25 @@ def makeCigarMask(cigar,increment=1):
     return cigarMask
 
 def makePysamCigarMask(cigarTuple,increment=1):
+    """Build a per-base mask vector from a pysam CIGAR tuple.
+
+    Equivalent to ``makeCigarMask`` but accepts the pysam representation
+    of a CIGAR string (a list of (operation_code, length) integer pairs)
+    rather than a text CIGAR string.  'M' operations contribute
+    ``increment``; 'N' operations contribute 0; other operations that do
+    not consume reference bases are omitted.
+
+    Args:
+        cigarTuple: A sequence of (operation, length) pairs as returned by
+            pysam's ``AlignedSegment.cigar`` attribute.  Operation codes
+            follow the SAM spec order: 0=M, 1=I, 2=D, 3=N, 4=S, 5=H, 6=P.
+        increment: Value assigned to each matched ('M') reference base.
+            Defaults to 1.
+
+    Returns:
+        A list of numeric values (each 0 or ``increment``) with one entry
+        per reference base consumed by the alignment.
+    """
     lookupTable = ['M','I','D','N','S','H','P']
     incrementTable = {
                       'M':increment,
@@ -454,6 +801,25 @@ def makePysamCigarMask(cigarTuple,increment=1):
     return cigarMask
 
 def bamFetchGappedInterval(bamHandle,chr,start,end,span=1):
+    """Compute strand-specific read-depth arrays across an interval, respecting CIGAR gaps.
+
+    Unlike ``bamFetchInterval``, this function uses each read's CIGAR
+    information (via ``makePysamCigarMask``) so that intronic regions ('N'
+    operations) do not contribute to the depth.  Fragment-length extension
+    is not yet implemented (TODO).
+
+    Args:
+        bamHandle: An open pysam AlignmentFile object.
+        chr: Reference sequence name / chromosome to query.
+        start: 0-based start of the interval.
+        end: 0-based end of the interval (inclusive).
+        span: Step size for down-sampling the output arrays.  Defaults to 1.
+
+    Returns:
+        A tuple (senseArr, antisenseArr) where each element is a numpy
+        array of length ``(end - start + 1) / span`` containing per-base
+        read depth for the respective strand across the interval.
+    """
     #TODO incoporate fragment size into reads (see above), default 200nt
     intervalSize = end-start+1
     senseArr = numpy.zeros(intervalSize)
@@ -491,8 +857,33 @@ def bamFetchGappedInterval(bamHandle,chr,start,end,span=1):
     return senseArr[::span],antisenseArr[::span]
 
 def findLargestKmer(bamHandle,chr,start,end,strand,k=21,gapped=False,span=1):
-    """Fetches read density across an interval and finds the start and end position (start and end offset by an index)
-     of the kmer with the largest value. Has not been tested yet"""
+    """Find the k-mer window with the highest total read depth within an interval.
+
+    Computes per-base read depth across the interval (using either the
+    simple or gapped pileup function) and slides a window of size ``k``
+    across the appropriate strand array to locate the window whose summed
+    depth is largest.
+
+    Note: This function has not been tested yet.
+
+    Args:
+        bamHandle: An open pysam AlignmentFile object.
+        chr: Reference sequence name / chromosome to query.
+        start: 0-based start of the interval.
+        end: 0-based end of the interval (inclusive).
+        strand: Which strand array to search; '+' uses the sense array,
+            '-' uses the antisense array.
+        k: Window size in bases.  Defaults to 21.
+        gapped: If True, uses ``bamFetchGappedInterval`` (CIGAR-aware
+            pileup); otherwise uses ``bamFetchInterval``.  Defaults to False.
+        span: Down-sampling step passed to the pileup function.
+            Defaults to 1.
+
+    Returns:
+        A tuple (window_start, window_end) giving the genomic coordinates
+        of the highest-scoring k-mer window.  Both values are offset from
+        ``start`` by the index of the best window.
+    """
     if not gapped:
         sense,antisense = bamFetchInterval(bamHandle,chr,start,end,span=span)
     else:
@@ -513,6 +904,28 @@ def findLargestKmer(bamHandle,chr,start,end,strand,k=21,gapped=False,span=1):
     return start+maxPos,end+maxPos
 
 def plotInterval(bamFiles,chr,start,end,name="",span=1,pdfName = "",sumStrands=False):
+    """Plot read depth across a genomic interval for one or more BAM files.
+
+    Uses rpy2 to create a multi-panel line plot, one panel per BAM file.
+    Forward-strand depth is shown in blue (positive y-axis) and reverse-
+    strand depth in red (negative y-axis) unless ``sumStrands`` is True,
+    in which case a single combined black trace is drawn.  Optionally saves
+    the plot to a PDF.
+
+    Args:
+        bamFiles: A list of paths to BAM files to plot (one panel each).
+        chr: Reference sequence name / chromosome to display.
+        start: 0-based start of the display window.
+        end: 0-based end of the display window (inclusive).
+        name: Optional label appended to each panel title.  Defaults to ''.
+        span: Down-sampling step passed to the pileup function.
+            Defaults to 1.
+        pdfName: If non-empty, the plot is written to this PDF path; otherwise
+            an interactive R window is opened.  Defaults to ''.
+        sumStrands: If False (default), sense and antisense tracks are
+            plotted separately with opposite sign.  If True, strand depths
+            are summed into a single positive trace.
+    """
     nplots = len(bamFiles)
 
     #Setup plot environment
@@ -544,6 +957,19 @@ def plotInterval(bamFiles,chr,start,end,name="",span=1,pdfName = "",sumStrands=F
         robjects.r['dev.off']()
 
 def bamStats(bamFile):
+    """Compute per-chromosome read counts for a BAM file.
+
+    Iterates over every read in the BAM file (including unmapped reads) and
+    tallies how many reads map to each reference sequence.
+
+    Args:
+        bamFile: Path to the BAM file.
+
+    Returns:
+        A dict with a single key ``'readDist'`` whose value is itself a
+        dict mapping reference sequence index (``rname``) to the number of
+        reads mapping to that reference.
+    """
     rtrn ={}
     #Fetch total reads in Bam by chromosome
     samfile = pysam.AlignmentFile(bamFile,'rb')
@@ -554,7 +980,20 @@ def bamStats(bamFile):
     return rtrn
 
 def getrRNAReads(bamFile,rRNABedFile):
-    """Takes a bed file of rRNA genes and queries the bam file to determine the number of unique reads that are mapping to rRNA genes in a given sample"""
+    """Count unique reads that map to rRNA gene loci.
+
+    Parses a BED file of rRNA gene coordinates and queries the BAM file for
+    each locus, collecting all overlapping read names.  Duplicate read names
+    are collapsed before returning the final count.
+
+    Args:
+        bamFile: Path to the sorted, indexed BAM file to query.
+        rRNABedFile: Path to a BED file listing rRNA gene intervals.
+
+    Returns:
+        The number of unique read names (query names) that overlap at least
+        one rRNA gene locus.
+    """
     reads = []
     bedIter = intervallib.parseBed(rRNABedFile)
     samfile = pysam.AlignmentFile(bamFile,'rb')
@@ -567,6 +1006,15 @@ def getrRNAReads(bamFile,rRNABedFile):
     return len(uniqify(reads))
 
 def uniqify(seq):
+    """Return the unique elements of a sequence (order not preserved).
+
+    Args:
+        seq: Any iterable of hashable elements.
+
+    Returns:
+        A dict_keys view containing one entry per unique element found in
+        ``seq``.  The original order is not preserved.
+    """
     # Not order preserving
     keys = {}
     for e in seq:
@@ -574,7 +1022,25 @@ def uniqify(seq):
     return keys.keys()
 
 def collapseMatrix(fname):
-    """Specifically finds a vector of sums for a chromatin matrix by position"""
+    """Sum a tab-delimited chromatin matrix column-wise across all samples.
+
+    Reads a matrix file whose first row is a header and whose subsequent
+    rows each begin with two identifier fields (sample and name) followed
+    by numeric values.  Returns the element-wise sum of all data rows and
+    the list of row names.
+
+    Args:
+        fname: Path to a tab-delimited matrix file.  Expected format: the
+            first line is a header whose columns (after the leading
+            identifier columns) name the positions.  Each subsequent line
+            starts with a sample identifier and a row name, followed by
+            numeric values.
+
+    Returns:
+        A tuple (names, sums) where ``names`` is a list of row-name strings
+        (second column of each data row) and ``sums`` is a numpy array of
+        the column-wise sums across all data rows.
+    """
     handle = open(fname,'r')
     header = handle.readline().rstrip()
     header = header.split("\t")[1:]
diff --git a/src/seqlib/plotting.py b/src/seqlib/plotting.py
index 89196d1..bf5e6b4 100644
--- a/src/seqlib/plotting.py
+++ b/src/seqlib/plotting.py
@@ -1,20 +1,34 @@
-'''
-Created on Jul 13, 2010
+"""Plotting utilities for genomic and epigenomic data visualisation.
 
-@author: lgoff
-'''
+Provides helper functions for generating publication-quality plots of
+chromatin mark occupancy and other aggregate genomic features using R via
+Rscript.
+"""
 import os
 
 
 def chromatinAggPlots(basename):
-    """
-    Makes chromatin aggregate plots
-    
-    requires:
-        basename.vec
-        basename.row
-        basename.col
-        
+    """Generates chromatin aggregate plots as a multi-panel PDF using R.
+
+    Writes an R script that reads three data files produced by an upstream
+    pipeline step, then calls Rscript to execute it and produce a PDF of
+    aggregate chromatin mark occupancy profiles centred on smRNA predictions.
+
+    Required input files (all derived from basename):
+        - basename.vec: Tab-delimited matrix of signal values.
+        - basename.row: Tab-delimited BED-like annotation of rows.
+        - basename.col: Tab-delimited column name file.
+
+    Output:
+        - basename.pdf: Multi-panel PDF with one line plot per chromatin mark.
+        - basename.q: The R script used to generate the plot (retained).
+
+    Args:
+        basename: Base path/name shared by all input files and used for the
+            output PDF and R script.
+
+    Returns:
+        The return code of the Rscript invocation (0 on success).
     """
     myScript = """
 colNames<-read.table("%s.col",colClasses="character",header=F,sep="\\t")
diff --git a/src/seqlib/primer3lib.py b/src/seqlib/primer3lib.py
index 48383f1..1a9d9af 100644
--- a/src/seqlib/primer3lib.py
+++ b/src/seqlib/primer3lib.py
@@ -1,12 +1,11 @@
-'''
-Created on Sep 9, 2010
+"""Primer3 output parsing and primer design helpers.
 
-Handles primer3 running and parsing output
+Provides data classes (Record, Primer) for representing primer3 output and
+a generator function for parsing primer3 Boulder-IO output files.  Also
+includes a convenience wrapper for running primer3_core directly from Python.
 
-primer3 >= v2.2
-
-@author: lgoff
-'''
+Requires primer3 >= v2.2.
+"""
 import subprocess
 import sys
 
@@ -14,20 +13,19 @@
 
 
 class Record(object):
-    '''
-    Represent information from a primer3 run finding primers.
-    
-    Members:
-        - sequenceID = value of SEQUENCE_ID field from primer3 record
-        - sequence = value of SEQUENCE_TEMPLATE field 
-        - primers = list of Primer objects describing primer pairs for this target sequence.
-        - comments = the comment line(s) for the record
-        - attributes = other global parameters relevant to the record as a whole and not just a primer
-    '''
+    """Represents the primer3 output for a single target sequence.
+
+    Attributes:
+        sequenceID: Value of the SEQUENCE_ID field from the primer3 record.
+        sequence: Value of the SEQUENCE_TEMPLATE field.
+        comments: Comment line(s) associated with the record.
+        primers: List of Primer objects describing primer pairs designed for
+            this target sequence.
+        attributes: Dictionary of other global parameters in the primer3
+            record that are not specific to an individual primer pair.
+    """
     def __init__(self):
-        '''
-        Constructor
-        '''
+        """Initialises a Record with empty/default attribute values."""
         self.sequenceID = ""
         self.sequence = ""
         self.comments = ""
@@ -35,19 +33,37 @@ def __init__(self):
         self.attributes = {}
 
     def __iter__(self):
+        """Iterates over the Primer objects in this record."""
         return iter(self.primers)
 
     def __repr__(self):
+        """Returns a short string representation of the record."""
         return "%s: %d primer pair(s)" % (self.sequenceID,len(self.primers))
 
 class Primer(object):
-    '''
-    A primer set designed by Primer3
-    '''
+    """Represents a single primer pair designed by Primer3.
+
+    Attributes:
+        sequenceID: ID of the target sequence for which this primer was
+            designed (matches the parent Record's sequenceID).
+        number: 1-based rank of this primer pair within the record.
+        size: Deprecated field; use product_size instead.
+        forward_seq: Sequence of the forward (left) primer.
+        forward_start: 0-based start position of the forward primer on the
+            template.
+        forward_length: Length of the forward primer in bases.
+        forward_tm: Melting temperature of the forward primer in °C.
+        forward_gc: GC content of the forward primer as a percentage.
+        reverse_seq: Sequence of the reverse (right) primer.
+        reverse_start: 0-based start position of the reverse primer on the
+            template.
+        reverse_length: Length of the reverse primer in bases.
+        reverse_tm: Melting temperature of the reverse primer in °C.
+        reverse_gc: GC content of the reverse primer as a percentage.
+        product_size: Expected PCR product size in base pairs.
+    """
     def __init__(self):
-        '''
-        Constructor
-        '''
+        """Initialises a Primer with zero/empty attribute values."""
         self.sequenceID=""
         self.number = 0
         self.size = 0
@@ -64,9 +80,30 @@ def __init__(self):
         self.product_size = 0
 
     def __repr__(self):
+        """Returns a short string representation showing the sequence ID, number, and primer sequences."""
         return "%s_%d\n\tFwd: %s\tRev: %s" % (self.sequenceID,self.number,self.forward_seq, self.reverse_seq)
 
 def parse(handle):
+    """Parses a primer3 Boulder-IO output file and yields Record objects.
+
+    Reads lines from the file handle, accumulates them until a '=' record
+    separator is encountered, then constructs a Record with its associated
+    Primer objects and yields it.
+
+    Args:
+        handle: A readable file-like object containing primer3 output in
+            Boulder-IO format (each record terminated by a line containing
+            only '=').
+
+    Yields:
+        Record objects, one per primer3 sequence entry.  Each Record contains
+        a list of Primer objects corresponding to the primer pairs returned
+        by primer3 for that sequence.
+
+    Raises:
+        StopIteration: When the end of the file is reached.
+        KeyError: If a required primer3 output field is missing from a record.
+    """
     recordLines = []
     while True:
         line = handle.readline().rstrip()
@@ -109,7 +146,25 @@ def parse(handle):
 #Context specific runs
 #######
 def runPrimer3(fastaFile,task="qpcr",p3CloneSetFile="/seq/compbio-hp/lgoff/lincRNAs/primer_design/P3_cloning_primer_settings.p3",p3PCRSetFile="/seq/compbio-hp/lgoff/lincRNAs/primer_design/P3_qPCR_primer_settings.p3"):
-    """Task can be either 'qpcr' or 'cloning'"""
+    """Runs primer3_core on a FASTA file to design qPCR or cloning primers.
+
+    Converts the FASTA file to Boulder-IO format and launches a primer3_core
+    subprocess with the appropriate settings file.  The output file path is
+    returned; note that the subprocess is not waited on before returning.
+
+    Args:
+        fastaFile: Path to a FASTA file of sequences to design primers for.
+        task: Either 'qpcr' (default) to design short amplicon primers, or
+            'cloning' to design full-length amplification primers using a
+            defined included region.
+        p3CloneSetFile: Path to the primer3 settings file used for cloning
+            primer design.
+        p3PCRSetFile: Path to the primer3 settings file used for qPCR primer
+            design.
+
+    Returns:
+        Path to the primer3 output file (baseName + '.p3out').
+    """
 
     baseName = fastaFile.rstrip(".fa")
     iter = sequencelib.FastaIterator(open(fastaFile,'r'))
diff --git a/src/seqlib/prob.py b/src/seqlib/prob.py
index 72d808a..e7969a4 100644
--- a/src/seqlib/prob.py
+++ b/src/seqlib/prob.py
@@ -1,4 +1,11 @@
 #!/usr/bin/env python
+"""Probability and statistics tools for DNA sequence analysis.
+
+Provides signal-to-noise ratio, Z-score, binning, cumulative sums,
+nucleotide frequency utilities, Gaussian evaluation, moving averages,
+Poisson and binomial probability functions, combinatorics, and
+dictionary utility functions used throughout seqlib.
+"""
 import math
 import operator
 import random
@@ -12,21 +19,61 @@
 #Probability Tools for DNA sequence analysis
 #######
 def snr(observed,expected):
+    """Compute the signal-to-noise ratio (SNR) of an observed count vs an expected count.
+
+    Calculates the simple ratio::
+
+        SNR = observed / expected
+
+    Args:
+        observed: The observed count or value (numeric).
+        expected: The expected count or value (numeric, must be non-zero).
+
+    Returns:
+        The ratio observed / expected as a float.
+    """
     return observed/expected
 
 def zscore(observed,expected):
+    """Compute the Z-score of an observed count under a Poisson null model.
+
+    Assumes the standard deviation equals the square root of the
+    expected count (Poisson approximation)::
+
+        Z = (observed - expected) / sqrt(expected)
+
+    Args:
+        observed: The observed count or value (numeric).
+        expected: The expected count or value (numeric, must be positive).
+
+    Returns:
+        The Z-score as a float.
+    """
     return (observed-expected)/math.sqrt(expected)
 
 def which_bin(bins, x, safe=0):
-    """
-    # if we're interested in binning x with boundaries
-    # 0, 5, 10, 15
-    # then it will return which boundary it belongs in.
-    # if x<0: -1
-    # if 0<=x<5: 0
-    # if 5<=x<10: 1
-    # if 10<=x<15: 2
-    # if x>=15: 3
+    """Determine which bin interval a value ``x`` falls into.
+
+    Given sorted bin boundary values, returns the 0-based index of the
+    interval that contains ``x``.  For example, with boundaries
+    ``[0, 5, 10, 15]``::
+
+        x < 0       -> -1
+        0 <= x < 5  ->  0
+        5 <= x < 10 ->  1
+        10 <= x < 15->  2
+        x >= 15     ->  3   (or len(bins) when safe=0)
+
+    Args:
+        bins: A sorted list of numeric bin boundary values.
+        x: The value to bin.
+        safe: If ``1`` and ``x`` exactly equals ``bins[-1]``, returns
+            ``len(bins)`` instead of the usual out-of-range value.
+            Defaults to 0.
+
+    Returns:
+        An integer bin index.  Returns ``-1`` if ``x < bins[0]``, or
+        ``len(bins)`` if ``x >= bins[-1]`` (unless ``safe=1`` applies).
     """
     if x<bins[0]: return -1
     for i in range(1,len(bins)):
@@ -35,6 +82,20 @@ def which_bin(bins, x, safe=0):
     return len(bins)
 
 def cumulative_sum(quality):
+    """Compute the cumulative sum of a list in-place-style (returns a new list).
+
+    Creates a copy of ``quality`` and then replaces each element with
+    the running total up to and including that position.
+
+    Args:
+        quality: A list of numeric values.
+
+    Returns:
+        A new list of the same length as ``quality`` where element ``i``
+        is the sum of ``quality[0]`` through ``quality[i]``.  Returns
+        the input unchanged (empty list or falsy value) if ``quality``
+        is empty.
+    """
     if not quality: return quality
     sum_q = quality[:]
     for i in range(1,len(quality)):
@@ -42,7 +103,20 @@ def cumulative_sum(quality):
     return sum_q
 
 def frequency_dic(seq):
-    """Generates dictionary of k,v='nucleotide':'frequency' from seq"""
+    """Build a nucleotide frequency dictionary from a DNA sequence.
+
+    Converts ``seq`` to uppercase and counts each of the four standard
+    bases (A, C, G, T) as a fraction of the total sequence length.
+
+    Args:
+        seq: A DNA sequence string.  Mixed-case input is handled by
+            uppercasing before counting.
+
+    Returns:
+        A dictionary mapping each of ``'A'``, ``'C'``, ``'G'``,
+        ``'T'`` to its relative frequency (float in [0, 1]).  Bases not
+        present in ``seq`` are mapped to 0.0.
+    """
     dic = {}
     bases = ['A','C','G','T']
     seq=seq.upper()
@@ -51,6 +125,27 @@ def frequency_dic(seq):
     return dic
 
 def pick_one(dic):
+    """Sample a single item from a dictionary of items and their probabilities.
+
+    Builds a cumulative distribution from the dictionary's values and
+    draws one item proportionally.  For example, with
+    ``{'A': .18, 'C': .32, 'G': .32, 'T': .18}``, ``'A'`` is returned
+    with probability 0.18, ``'C'`` with 0.32, and so on.
+
+    Note:
+        The function relies on :func:`cget` to extract values from
+        ``dic.items()``; the behaviour may vary depending on dictionary
+        iteration order (insertion order in Python 3.7+).
+
+    Args:
+        dic: A dictionary mapping hashable items to their relative
+            probabilities.  Values should be non-negative; they need
+            not sum to exactly 1.
+
+    Returns:
+        A randomly selected key from ``dic``, sampled with probability
+        proportional to its value.
+    """
     # {'A': .18, 'C': .32, 'G': .32, 'T': .18}
     # will generate A with probability .18 and so on
     items = dic.items()
@@ -66,6 +161,22 @@ def pick_one(dic):
         return items[which_bin(cums, random.uniform(0,cums[-1]), safe=1)][0]
 
 def pick_many(dic, n):
+    """Sample ``n`` items independently from a probability dictionary.
+
+    Builds a cumulative distribution from the dictionary's values once
+    and then draws ``n`` samples with replacement.  For example, with
+    ``{'A': .18, 'C': .32, 'G': .32, 'T': .18}``, each draw returns
+    ``'A'`` with probability 0.18, ``'C'`` with 0.32, and so on.
+
+    Args:
+        dic: A dictionary mapping hashable items to their relative
+            probabilities.  Values should be non-negative.
+        n: The number of items to draw.
+
+    Returns:
+        A list of ``n`` keys from ``dic``, each sampled with probability
+        proportional to its value.
+    """
     # {'A': .18, 'C': .32, 'G': .32, 'T': .18}
     # will generate A with probability .18 and so on
     items = dic.items()
@@ -78,26 +189,65 @@ def pick_many(dic, n):
     return choices
 
 def gaussian(x,mu,sigma):
-    """
-    Evaluate N(mu,sigma) at x.
-    where N(mu,sigma) is a gaussian of mean mu and stdev sigma
-    """
+    """Evaluate the Gaussian (normal) PDF N(mu, sigma) at ``x``.
 
+    Computes::
+
+        f(x) = (1 / sqrt(2 * pi * sigma)) * exp(-((x - mu)^2) / (2 * sigma^2))
+
+    Note:
+        The normalisation constant uses ``sqrt(2*pi*sigma)`` rather than
+        the more common ``sigma*sqrt(2*pi)``.  For the function to
+        integrate to 1 the usual convention is ``sigma`` (standard
+        deviation) in the denominator as ``sigma * sqrt(2*pi)``.
+
+    Args:
+        x: The point at which to evaluate the PDF.
+        mu: The mean of the Gaussian.
+        sigma: The standard deviation of the Gaussian.
+
+    Returns:
+        The PDF value at ``x`` as a float.
+    """
     return ( (1.0/math.sqrt(2*math.pi*sigma)) * (math.e**(-((x-mu)**2)/(2*sigma**2))))
 
 def make_gaussian(mu,sigma):
-    """
-    usage:
-    N2_3 = make_gaussian(2,3)
-    N2_3(4) -> gaussianN(2,3) evaluated at 4
+    """Create a Gaussian PDF function with fixed mean and standard deviation.
+
+    Returns a callable that evaluates the Gaussian PDF at any point
+    ``x``, with ``mu`` and ``sigma`` captured by closure.
+
+    Example::
+
+        N2_3 = make_gaussian(2, 3)
+        N2_3(4)  # -> gaussian(4, mu=2, sigma=3)
+
+    Args:
+        mu: The mean of the Gaussian.
+        sigma: The standard deviation of the Gaussian.
+
+    Returns:
+        A function ``f(x)`` that evaluates the Gaussian N(mu, sigma) at
+        ``x``.
     """
     return lambda x,mu=mu,sigma=sigma: ( (1.0/math.sqrt(2*math.pi*sigma)) * (math.e**(-((x-mu)**2)/(2*sigma**2))))
 
 def make_adder(n):
-    """
-    usage:
-    Add2=make_adder(2)
-    Add2(3) -> 5
+    """Create an adder function that adds a fixed value ``n`` to its argument.
+
+    Returns a callable that adds ``n`` (captured by closure) to any
+    input ``x``.
+
+    Example::
+
+        Add2 = make_adder(2)
+        Add2(3)  # -> 5
+
+    Args:
+        n: The fixed value to add.
+
+    Returns:
+        A function ``f(x)`` that returns ``x + n``.
     """
     return lambda x,n=n: x+n
 
@@ -107,6 +257,17 @@ def make_adder(n):
 loge_2 = math.log(2)
 
 def avg(l,precise=0):
+    """Compute the arithmetic mean of a list of numbers.
+
+    Args:
+        l: A list of numeric values.
+        precise: If non-zero, divide by ``float(len(l))`` for a
+            floating-point result.  If 0 (default), divide by
+            ``len(l)`` using integer or floor division.
+
+    Returns:
+        The mean of ``l`` as a number, or 0 if ``l`` is empty.
+    """
     if not l: return 0
     if precise:
         return reduce(operator.add,l,0)/float(len(l))
@@ -114,28 +275,78 @@ def avg(l,precise=0):
         return reduce(operator.add,l,0)/len(l)
 
 def movavg(s, n):
-    ''' returns an n period moving average for the time series s
+    """Compute an n-period moving average for a time series.
+
+    Uses cumulative sums for an O(len(s)) implementation::
 
-        s is a list ordered from oldest (index 0) to most recent (index -1)
-        n is an integer
+        MA[i] = mean(s[i-n+1 : i+1])
 
-        returns a numeric array of the moving average
-    '''
+    The result has length ``len(s) - n + 1``.
+
+    Args:
+        s: A list or array of numeric values ordered from oldest
+            (index 0) to most recent (index -1).
+        n: The window size (number of periods) for the moving average.
+
+    Returns:
+        A NumPy array of the moving average values.  The array has
+        ``len(s) - n + 1`` elements.
+    """
     s = np.array(s)
     c = np.cumsum(s)
     return (c[n-1:] - c[:-n+1]) / float(n)
 
 
 def median(l):
+    """Compute the median of a list of numbers.
+
+    Sorts ``l`` and returns the middle value for odd-length lists or the
+    average of the two middle values for even-length lists.
+
+    Args:
+        l: A list of numeric values.
+
+    Returns:
+        The median value, or ``None`` if ``l`` is empty.
+    """
     if not l: return None
     l = sorted(l)
     if len(l)%2: return sorted(l)[len(l)//2]
     else: return (l[len(l)//2]+l[len(l)//2-1])/2.0
 
 def stdev(l, failfast=1):
+    """Compute the sample standard deviation of a list of numbers.
+
+    Returns the square root of the sample variance computed by
+    :func:`variance`.
+
+    Args:
+        l: A list of numeric values with at least 2 elements.
+        failfast: Passed directly to :func:`variance`.  If non-zero
+            (default), raises an error when fewer than 2 samples are
+            provided.
+
+    Returns:
+        The sample standard deviation as a float.
+    """
     return math.sqrt(variance(l,failfast=failfast))
 
 def variance(l,failfast=1):
+    """Compute the sample variance of a list of numbers.
+
+    Uses Bessel's correction (divides by ``n - 1``)::
+
+        s^2 = sum((x - mean)^2) / (n - 1)
+
+    Args:
+        l: A list of numeric values.
+        failfast: If non-zero (default), raises a string exception when
+            fewer than 2 samples are provided.  If 0, returns 0 instead.
+
+    Returns:
+        The sample variance as a float, or 0 when ``failfast=0`` and
+        the list has fewer than 2 elements.
+    """
     if (not l) or len(l)==1:
         if failfast: raise "tools.variance: Not enough samples.  Need >= 2, got %s"%len(l)
         else: return 0#'N/A'
@@ -146,14 +357,51 @@ def variance(l,failfast=1):
     return s / (len(l)-1)
 
 def log2(x):
+    """Compute the base-2 logarithm of ``x``.
+
+    Uses the change-of-base formula::
+
+        log2(x) = ln(x) / ln(2)
+
+    Args:
+        x: A positive real number.
+
+    Returns:
+        The base-2 logarithm of ``x`` as a float.
+    """
     #converting bases: log_a(b) = log_c(b)/log_c(a)
     #i.e. log_2(x) = log_e(2)/log_e(x) = log_10(2)/log_10(x)
     return math.log(x)/float(loge_2)
 
 def log_k(x,k):
+    """Compute the base-``k`` logarithm of ``x``.
+
+    Uses the change-of-base formula::
+
+        log_k(x) = ln(x) / ln(k)
+
+    Args:
+        x: A positive real number.
+        k: The base of the logarithm (positive real number != 1).
+
+    Returns:
+        The base-``k`` logarithm of ``x`` as a float.
+    """
     return math.log(x)/math.log(k)
 
 def prob2score(prob):
+    """Convert a probability to a Phred-like quality score.
+
+    Computes ``-10 * log10(prob)``, so a probability of 1/100 maps to
+    a score of 20 (the standard Phred-score convention).
+
+    Args:
+        prob: A probability value (float in (0, 1]).
+
+    Returns:
+        A float quality score equal to ``-10 * log10(prob)``.  Returns
+        -1 if any exception is raised (e.g. ``prob=0``).
+    """
     #1/100 -> 20
     try:
         return -10*float(math.log10(float(prob)))
@@ -161,10 +409,32 @@ def prob2score(prob):
         return -1
 
 def p2bits(p):
-    """Takes p-value and returns negative log2"""
+    """Convert a p-value to bits of evidence (negative log base-2).
+
+    Computes ``-log2(p)``, which quantifies the evidence against the
+    null hypothesis in bits.
+
+    Args:
+        p: A p-value (float in (0, 1]).
+
+    Returns:
+        A float equal to ``-log2(p)``.  Higher values indicate stronger
+        evidence against the null.
+    """
     return -log2(p)
 
 def factorial(n):
+    """Compute n! (n factorial) iteratively.
+
+    Multiplies all integers from ``n`` down to 1.
+
+    Args:
+        n: A non-negative integer.
+
+    Returns:
+        An integer equal to ``n * (n-1) * ... * 2 * 1``.  Returns 1
+        when ``n`` is 0 or 1.
+    """
     result = 1
     for i in range(n,0,-1):
         #print i
@@ -175,18 +445,63 @@ def factorial(n):
 #Poisson
 ###########
 def poisson_expected(rate):
+    """Print a table of Poisson probabilities for counts 1 to 49.
+
+    For each integer ``x`` from 1 to 49, prints the Poisson probability
+    ``P(X = x; rate)`` and the expected count in a population of 12 million::
+
+        x   P(X=x)   12000000 * P(X=x)
+
+    Args:
+        rate: The Poisson rate parameter (expected number of events).
+    """
     for x in range(1,50,1):
         p = poisson(rate,x)
         print(f"{x}\t{p}\t{12000000*p}")
 
 def poisson(rate, x):
-    """Returns the probability of observing a count of x"""
+    """Compute the Poisson probability of observing exactly ``x`` events.
+
+    Evaluates the Poisson PMF::
+
+        P(X = x; rate) = exp(-rate) * rate^x / x!
+
+    Args:
+        rate: The expected number of events (lambda, must be non-negative).
+        x: The observed count (non-negative integer).
+
+    Returns:
+        The probability P(X = x) as a float.
+    """
     return math.exp(-rate)*(rate**x)/factorial(x)
 
 ######################
 #Binomial Distribution
 #######################
 def binomial_likelihood_ratio(ps,k,n):
+    """Compute the likelihood ratio of two binomial hypotheses.
+
+    Given two probability parameters ``ps[0]`` (null hypothesis H0) and
+    ``ps[1]`` (alternative hypothesis H1), computes::
+
+        LR = log(P(k | p=ps[1], n)) / P(k | p=ps[0], n)
+
+    Note:
+        The formula mixes log and linear likelihoods and is not the
+        standard log-likelihood ratio test; see :func:`binomial_log_likelihood_ratio`
+        for the standard implementation.
+
+    Args:
+        ps: A 2-element list ``[p0, p1]`` where ``p0`` is the null
+            probability and ``p1`` is the alternative probability.
+        k: The observed number of successes.
+        n: The total number of trials.
+
+    Returns:
+        A float representing the likelihood ratio.  Returns
+        ``sys.maxsize`` with a warning message if the null hypothesis
+        likelihood is 0.
+    """
     # p[0] is the null hypothesis
     # p[1] is the hypothesis being tested
     assert(len(ps)==2)
@@ -202,20 +517,83 @@ def binomial_likelihood_ratio(ps,k,n):
         return sys.maxsize
 
 def binomial_log_likelihood_ratio(ps,k,n):
+    """Compute the log-likelihood ratio of two binomial hypotheses.
+
+    Calculates::
+
+        LLR = log P(k | p=ps[1], n) - log P(k | p=ps[0], n)
+
+    where each log probability is computed by :func:`log_binomial`.
+    A positive LLR supports the alternative hypothesis ``ps[1]`` over
+    the null ``ps[0]``.
+
+    Args:
+        ps: A 2-element list ``[p0, p1]`` where ``p0`` is the null
+            success probability and ``p1`` is the alternative.
+        k: The observed number of successes.
+        n: The total number of trials.
+
+    Returns:
+        The log-likelihood ratio as a float.
+    """
     return log_binomial(ps[1],k,n) - log_binomial(ps[0],k,n)
 
 def log_binomial(p,k,n):
+    """Compute the log probability of the binomial PMF.
+
+    Returns the natural log of P(X = k) for X ~ Binomial(n, p)::
+
+        log P(k; n, p) = log C(n, k) + k*log(p) + (n-k)*log(1-p)
+
+    Args:
+        p: The probability of success per trial (float in (0, 1)).
+        k: The number of successes (non-negative integer).
+        n: The number of trials (integer >= k).
+
+    Returns:
+        The natural log of the binomial PMF as a float.
+    """
     # the log probability of seeing exactly k successes in n trials
     # given the probability of success is p
     return log_n_choose_k(n,k)+math.log(p)*k+math.log(1-p)*(n-k)
 
 def binomial(p,k,n):
+    """Compute the binomial probability P(X = k; n, p).
+
+    Calculates the probability of observing exactly ``k`` successes in
+    ``n`` independent Bernoulli trials each with success probability
+    ``p``::
+
+        P(X = k) = C(n, k) * p^k * (1-p)^(n-k)
+
+    Args:
+        p: The probability of success per trial (float in [0, 1]).
+        k: The number of successes (non-negative integer).
+        n: The number of trials (integer >= k).
+
+    Returns:
+        The binomial probability as a float.
+    """
     # probability of seeing exactly k successes in n trials, given
     # the probability of success is p
     #return n_choose_k(n,k)*(p**k)*((1-p)**(n-k))
     return n_choose_k(n,k)*(p**k)*((1-p)**(n-k))
 
 def cumBinomial(p,k,n):
+    """Compute the cumulative binomial probability P(X <= k; n, p).
+
+    Sums the binomial PMF from 0 to ``k`` inclusive::
+
+        P(X <= k) = sum_{j=0}^{k} C(n, j) * p^j * (1-p)^(n-j)
+
+    Args:
+        p: The probability of success per trial (float in [0, 1]).
+        k: The upper bound on the number of successes (non-negative int).
+        n: The number of trials (integer >= k).
+
+    Returns:
+        The cumulative binomial probability P(X <= k) as a float.
+    """
     #Returns the cumulative probability from the binomaial distribution
     Pval = 0.0
     for j in range(0,k+1):
@@ -223,6 +601,25 @@ def cumBinomial(p,k,n):
     return Pval
 
 def n_choose_k(n,k):
+    """Compute the binomial coefficient C(n, k) = n! / (k! * (n-k)!).
+
+    Uses the multiplicative recurrence::
+
+        C(n, k) = (n * (n-1) * ... * (n-k+1)) / (k * (k-1) * ... * 1)
+
+    Exploits the symmetry ``C(n, k) = C(n, n-k)`` to choose the smaller
+    of ``k`` and ``n-k`` for efficiency.
+
+    Args:
+        n: Total number of items (non-negative integer).
+        k: Number of items to choose (non-negative integer, ``k <= n``).
+
+    Returns:
+        The binomial coefficient C(n, k) as a float.
+
+    Raises:
+        AssertionError: If ``k > n``.
+    """
     # (n k) = n! / (k! (n-k)!)
     #
     #         n*(n-1)*(n-2)*....*(n-k+1)
@@ -244,6 +641,25 @@ def n_choose_k(n,k):
     return result
 
 def log_n_choose_k(n,k):
+    """Compute log(C(n, k)) in log space to avoid integer overflow.
+
+    Evaluates the natural logarithm of the binomial coefficient using
+    the additive log form of the multiplicative recurrence::
+
+        log C(n, k) = sum(log(n-i+1) - log(i)  for i in 1..k')
+
+    where ``k' = min(k, n-k)``.
+
+    Args:
+        n: Total number of items (non-negative integer).
+        k: Number of items to choose (non-negative integer, ``k <= n``).
+
+    Returns:
+        The natural log of C(n, k) as a float.
+
+    Raises:
+        AssertionError: If ``k > n``.
+    """
     # (n k) = n! / (k! (n-k)!)
     #
     #         n*(n-1)*(n-2)*....*(n-k+1)
@@ -263,6 +679,25 @@ def log_n_choose_k(n,k):
 #Dictionary Tools
 #################
 def cget(diclist, key, strict=1):
+    """Extract the same key from every item in a list of dicts (or sequences).
+
+    Also known as "cross-get" or "gather".  Iterates over ``diclist``
+    and collects ``item[key]`` for each element.
+
+    Args:
+        diclist: A list of dictionaries or index-accessible objects that
+            all share the specified ``key``.
+        key: The key (or integer index) to look up in each element.
+        strict: If non-zero (default), every element must contain
+            ``key``; raises ``KeyError`` or ``IndexError`` otherwise.
+            If 0, silently skips elements that are falsy or do not
+            contain ``key`` (using ``generic_has_key``).
+
+    Returns:
+        A list of values ``item[key]`` for each item in ``diclist``.
+        When ``strict=1`` the returned list has the same length as
+        ``diclist``.  When ``strict=0`` the length may be shorter.
+    """
     # cross_get was: gather(diclist,key)
     # gathers the same key from a list of dictionaries
     # can also be used in lists
diff --git a/src/seqlib/pygrlib.py b/src/seqlib/pygrlib.py
index 9f5b1e7..f980c61 100644
--- a/src/seqlib/pygrlib.py
+++ b/src/seqlib/pygrlib.py
@@ -18,7 +18,27 @@
 
 ###Classes
 class MySliceInfo(object):
+    """Stores coordinate information for a genomic slice in pygr convention.
+
+    Holds the four fields required to identify a sequence slice: sequence ID,
+    start, stop (exclusive), and orientation (+1 or -1).
+
+    Attributes:
+        id: Sequence (chromosome) identifier.
+        start: 0-based start coordinate.
+        stop: Exclusive end coordinate.
+        orientation: Strand orientation; +1 for forward, -1 for reverse.
+    """
     def __init__(self, seq_id, start, stop, orientation):
+        """Initialises a MySliceInfo.
+
+        Args:
+            seq_id: Sequence (chromosome) identifier.
+            start: 0-based start coordinate of the slice.
+            stop: Exclusive end coordinate of the slice.
+            orientation: Strand orientation; +1 for forward strand, -1 for
+                reverse strand (pygr convention).
+        """
         (self.id, self.start, self.stop, self.orientation) = \
             (seq_id, start, stop, orientation)
 
@@ -26,7 +46,30 @@ def __init__(self, seq_id, start, stop, orientation):
 ###GFF Futzing around
 
 class GFF3Row(object):
+    """Represents a single data row from a GFF3 annotation file.
+
+    Parses one GFF3 line and stores the type, sequence ID, start/stop
+    coordinates (converted to 0-based pygr convention), strand orientation,
+    and all key=value attributes from column 9.
+
+    Attributes:
+        type: Feature type string from column 3 (e.g. 'gene', 'exon').
+        id: Sequence (chromosome) ID from column 1.
+        start: 0-based start coordinate (GFF3 1-based column 4 minus 1).
+        stop: Exclusive end coordinate (GFF3 column 5).
+        orientation: +1 for '+' strand, -1 for '-' strand.
+        Additional attributes are set dynamically from column 9 key=value
+        pairs; multi-value attributes (comma-separated) are stored as lists.
+    """
     def __init__(self, line):
+        """Parses a GFF3 line into a GFF3Row object.
+
+        Args:
+            line: A single tab-delimited GFF3 data line (not a comment).
+
+        Raises:
+            ValueError: If the strand character in column 7 is not '+' or '-'.
+        """
         cols = line.split('\t')
         self.type = cols[2]
         self.id = cols[0]  # sequence ID
@@ -47,6 +90,25 @@ def __init__(self, line):
 
 
 def read_gff3(filename, genome):
+    """Reads a GFF3 annotation file and builds pygr AnnotationDB objects.
+
+    Parses a GFF3 file, groups features by type, and creates one pygr
+    AnnotationDB per feature type, each associated with the provided genome
+    sequence database.  Comment lines (starting with '#') are skipped.
+    Features lacking a type or gene_id attribute are also skipped.
+
+    Args:
+        filename: Path to a GFF3-format annotation file.
+        genome: A pygr sequence database object (e.g. a worldbase genome)
+            used to associate annotation slices with genomic sequence.
+
+    Returns:
+        A dictionary mapping feature type strings to pygr AnnotationDB
+        objects.
+
+    Raises:
+        ImportError: If the pygr library is not installed.
+    """
     if not _PYGR_AVAILABLE:
         raise ImportError("pygr is required for read_gff3 but is not installed.")
     d = {}  # for different types of sliceDBs
diff --git a/src/seqlib/seqData.py b/src/seqlib/seqData.py
index 23f970b..5258693 100644
--- a/src/seqlib/seqData.py
+++ b/src/seqlib/seqData.py
@@ -1,9 +1,17 @@
 #!/usr/bin/env python
-'''
-Created on Oct 27, 2009
+"""Data structures and utilities for working with BAM/SAM sequencing data.
 
-@author: lgoff
-'''
+Provides SamData and ChromData classes wrapping pysam for read access to BAM
+files, a plotRegions function for strand-aware coverage visualisation via rpy,
+and helper utilities for parsing SAM bitflags and converting reads to Interval
+objects.
+
+Note: This module depends on pysam and rpy, which must be installed separately.
+
+Originally created on Oct 27, 2009.
+
+Author: lgoff
+"""
 
 import intervallib
 import pysam
@@ -11,7 +19,26 @@
 
 
 class SamData:
+    """Wrapper around a pysam BAM file handle.
+
+    Provides basic access to a sorted, indexed BAM file including pileup
+    queries and a pysam Samfile handle.
+
+    Attributes:
+        name: Sample name string.
+        file: Path to the BAM file.
+        description: Human-readable description string.
+        type: Data type label (default "basic").
+        handle: Open pysam.Samfile handle.
+    """
     def __init__(self,name,file,description):
+        """Initialize and open a SamData object.
+
+        Args:
+            name: Sample name string.
+            file: Path to the BAM file.
+            description: Human-readable description of the sample.
+        """
         self.name = name
         self.file = file
         self.description = description
@@ -19,22 +46,37 @@ def __init__(self,name,file,description):
         self.open()
 
     def __str__(self):
+        """Return the sample name string."""
         return self.name
 
     def open(self):
-        """Returns a pysam handle to the .BAM file"""
+        """Open the BAM file and store the pysam handle in self.handle."""
         self.handle = pysam.Samfile(self.file,'rb')
 
     def close(self):
+        """Close the pysam BAM file handle."""
         self.handle.close()
 
     def samSort(self):
+        """Placeholder for BAM sorting (not yet implemented)."""
         pass
 
     def samIndex(self):
+        """Placeholder for BAM indexing (not yet implemented)."""
         pass
 
     def pileupQuery(self,chr,start='',end=''):
+        """Return per-position pileup depths for a genomic region.
+
+        Args:
+            chr: Chromosome name string.
+            start: Start coordinate (default "" for beginning of chromosome).
+            end: End coordinate (default "" for end of chromosome).
+
+        Returns:
+            A tuple (pos, n) where pos is a list of genomic positions and
+            n is a list of corresponding pileup depths.
+        """
         pos = []
         n = []
         for pileupcolumn in self.handle.pileup(chr,start,end):
@@ -43,7 +85,25 @@ def pileupQuery(self,chr,start='',end=''):
         return (pos,n)
 
 class ChromData(SamData):
+    """SamData subclass for chromatin modification ChIP-seq BAM files.
+
+    Extends SamData with mark and cell-line metadata.
+
+    Attributes:
+        mark: Histone mark or chromatin feature name (e.g. "H3K4me3").
+        cellLine: Cell line identifier string.
+        type: Data type label (always "chromatin").
+    """
     def __init__(self,name,file,description,mark,cellLine):
+        """Initialize a ChromData object.
+
+        Args:
+            name: Sample name string.
+            file: Path to the BAM file.
+            description: Human-readable description.
+            mark: Histone mark or antibody target name.
+            cellLine: Cell line identifier string.
+        """
         SamData.__init__(self, name=name, file=file, description=description)
         self.mark = mark
         self.cellLine = cellLine
@@ -74,6 +134,18 @@ def __init__(self,name,file,description,mark,cellLine):
             }
 
 def openBams(dataDict,cellLine):
+    """Open a collection of BAM files described by a dictionary.
+
+    Creates ChromData objects for each entry in dataDict, opens each BAM
+    file handle, and returns the list.
+
+    Args:
+        dataDict: Dict mapping mark name to BAM file path.
+        cellLine: Cell line identifier assigned to all ChromData objects.
+
+    Returns:
+        List of opened ChromData objects.
+    """
     files = []
     for k,v in dataDict.items():
         sample = v.split("_")[0]
@@ -99,7 +171,18 @@ def plotRegions(bamHandle,chrom,start,end):
 
 """
 def plotRegions(bamHandle,chrom,start,end):
-    """Incorporates strandedness and possibly an extension factor to account for fragment size"""
+    """Plot strand-aware read coverage for a genomic region using rpy.
+
+    Counts per-position forward ("+") and reverse ("-") read coverage using
+    pysam fetch, then draws a coverage plot via rpy with forward reads in blue
+    above the axis and reverse reads in red below.
+
+    Args:
+        bamHandle: An open pysam Samfile or AlignmentFile handle.
+        chrom: Chromosome name string.
+        start: Start coordinate (integer).
+        end: End coordinate (integer).
+    """
     tmp = {}
     tmp["+"] = {}
     tmp["-"] = {}
@@ -119,8 +202,18 @@ def plotRegions(bamHandle,chrom,start,end):
 
 
 def plotChromProfile(bamFiles,chrom,start,end):
-    """Not terribly flexible at this point, but will plot 'tracks' from a given chrom,start,end 
-    position from a list of opened .BAM files"""
+    """Plot stacked pileup-depth tracks for multiple BAM files via rpy.
+
+    Opens a new rpy graphics device and plots one coverage track per BAM
+    file in a vertically stacked layout. Not very flexible at this point.
+
+    Args:
+        bamFiles: List of opened SamData (or similar) objects with a
+            .handle attribute supporting pileup() and a .name attribute.
+        chrom: Chromosome name string.
+        start: Start coordinate (integer).
+        end: End coordinate (integer).
+    """
 
     r.x11(width=6,height=10)
     r.par(mfrow=[len(bamFiles),1])
@@ -136,10 +229,17 @@ def plotChromProfile(bamFiles,chrom,start,end):
 #Functions for sam Reads
 ###############
 def getBitValue(n, p):
-    '''
-    get the bitvalue of denary (base 10) number n at the equivalent binary
-    position p (binary count starts at position 0 from the right)
-    '''
+    """Return the bit value of integer n at binary position p.
+
+    Binary position 0 is the least significant bit (rightmost).
+
+    Args:
+        n: Denary (base-10) integer.
+        p: Bit position to inspect (0-indexed from the right).
+
+    Returns:
+        0 or 1 depending on the bit at position p.
+    """
     return (n >> p) & 1
 
 def strandFlag(flag):
@@ -153,10 +253,33 @@ def strandFlag(flag):
         return "*"
 
 def samRead2Interval(samRead):
+    """Convert a single pysam AlignedRead to an intervallib.Interval.
+
+    The strand is determined from the SAM bitflag. Coordinates are converted
+    to 1-based by adding 1 to samRead.pos.
+
+    Args:
+        samRead: A pysam AlignedRead object.
+
+    Returns:
+        An intervallib.Interval with chr set to samRead.qname, 1-based
+        start/end coordinates, and strand derived from the bitflag.
+    """
     strand = strandFlag(int(samRead.flag))
     return intervallib.Interval(samRead.qname,int(samRead.pos)+1,int(samRead.pos)+samRead.rlen+1,strand)
 
 def samReads2Intervals(samReads,start='start',end='end',score='readcount',sampleName=".",offset=0):
-    """samReads is an iterator object over a set of sam reads using the pysam 'fetch' call"""
+    """Convert a pysam fetch iterator of SAM reads to Interval objects.
+
+    Note: This function is not yet implemented (passes without action).
+
+    Args:
+        samReads: Iterator object over SAM reads from a pysam 'fetch' call.
+        start: Name of the start coordinate field (default "start").
+        end: Name of the end coordinate field (default "end").
+        score: Name of the score field (default "readcount").
+        sampleName: Sample name string (default ".").
+        offset: Integer offset applied to coordinates (default 0).
+    """
     pass
 
diff --git a/src/seqlib/seqlib.py b/src/seqlib/seqlib.py
index adaf53c..43d0e32 100644
--- a/src/seqlib/seqlib.py
+++ b/src/seqlib/seqlib.py
@@ -1,3 +1,13 @@
+"""Sequence data structures and molecular biology utilities.
+
+Provides SeqDict, a dictionary subclass for ordered molecular sequences, and
+a variety of constants and functions for DNA/RNA/protein operations including
+codon translation, reverse complementation, GC content calculation, and
+Kimura sequence evolution simulation.
+
+Author: lgoff (derived from rasmus seqlib)
+"""
+
 import copy
 import math
 import random
@@ -6,20 +16,30 @@
 
 
 class SeqDict (dict):
-    """\
-    A dictionary for molecular sequences.  Also keeps track of their order,
-    useful for reading and writing sequences from fasta's.  See fasta.FastaDict
-    for subclass that implements FASTA reading and writing.
+    """A dictionary for molecular sequences that also tracks insertion order.
+
+    Useful for reading and writing sequences from FASTA files where order
+    matters. Keys are sequence names; values are sequence strings. See
+    fasta.FastaDict for a subclass that implements FASTA reading and writing.
+
+    Attributes:
+        names: List of sequence names in insertion order.
     """
 
     def __init__(self):
+        """Initialize an empty SeqDict."""
         dict.__init__(self)
 
         self.names = []
 
 
     def orderNames(self, aln):
-        """Orders the names in the same order they appear in aln"""
+        """Reorder self.names to match the key order of another dict.
+
+        Args:
+            aln: A dict (typically another SeqDict or alignment) whose key
+                order is used to sort self.names.
+        """
 
         # Inlined util.list2lookup: creates a dict mapping list items to their index
         lookup = {v: i for i, v in enumerate(aln.keys())}
@@ -28,6 +48,18 @@ def orderNames(self, aln):
 
     # add a key, value pair
     def add(self, key, value, errors=False):
+        """Add a key-value pair, keeping the longest value on duplicate keys.
+
+        If the key already exists and the new value is at least as long as the
+        stored value, the stored value is replaced. The insertion order in
+        self.names is preserved (duplicate keys do not add to names).
+
+        Args:
+            key: Sequence name string.
+            value: Sequence string.
+            errors: If True, write a warning to stderr on duplicate keys
+                (default False).
+        """
         if key in self:
             if errors:
                 # Inlined util.logger: write to stderr
@@ -43,7 +75,17 @@ def add(self, key, value, errors=False):
 
 
     def get(self, keys, new=None):
-        """Return a subset of the sequences"""
+        """Return a new SeqDict containing only the given keys.
+
+        Args:
+            keys: Iterable of key names to include.
+            new: Optional pre-existing SeqDict to populate. If None, a new
+                instance of the same type is created.
+
+        Returns:
+            A SeqDict (or instance of the same subclass) containing the
+            requested keys that are present in self.
+        """
 
         if new == None:
             new = type(self)()
@@ -66,57 +108,103 @@ def alignlen(self):
 
     # The following methods keep names in sync with dictionary keys
     def __setitem__(self, key, value):
+        """Set a key-value pair and add key to self.names if new."""
         if key not in self:
             self.names.append(key)
         dict.__setitem__(self, key, value)
 
     def __delitem__(self, key):
+        """Delete a key and remove it from self.names."""
         self.names.remove(key)
 
     def update(self, dct):
+        """Update from another dict, appending new keys to self.names.
+
+        Args:
+            dct: Dict-like object whose items will be merged into self.
+        """
         for key in dct:
             if key not in self.names:
                 self.names.append(key)
         dict.update(self, dct)
 
     def setdefault(self, key, value):
+        """Set key to value only if key is absent, tracking order.
+
+        Args:
+            key: Key to look up or set.
+            value: Default value to assign if key is missing.
+        """
         if key not in self.names:
             self.names.append(key)
         dict.setdefault(self, key, value)
 
     def clear(self):
+        """Remove all items and reset self.names to an empty list."""
         self.names = []
         dict.clear(self)
 
     # keys are always sorted in order added
     def keys(self):
+        """Return keys in insertion order.
+
+        Returns:
+            List of key names in insertion order.
+        """
         return list(self.names)
 
     def iterkeys(self):
+        """Iterate over keys in insertion order.
+
+        Returns:
+            Iterator over key name strings.
+        """
         return iter(self.names)
 
     def values(self):
+        """Return values in key insertion order.
+
+        Returns:
+            List of sequence strings in the same order as self.names.
+        """
         return [self[key] for key in self.iterkeys()]
 
     def itervalues(self):
+        """Iterate over values in key insertion order.
+
+        Returns:
+            Generator yielding sequence strings in insertion order.
+        """
         def func():
             for key in self.iterkeys():
                 yield self[key]
         return func()
 
     def iteritems(self):
+        """Iterate over (key, value) pairs in key insertion order.
+
+        Returns:
+            Generator yielding (name, sequence) tuples.
+        """
         def func():
             for key in self.iterkeys():
                 yield (key, self[key])
         return func()
 
     def items(self):
+        """Return list of (key, value) pairs in insertion order.
+
+        Returns:
+            List of (name, sequence) tuples.
+        """
         return list(self.iteritems())
 
     def __iter__(self):
+        """Iterate over keys in insertion order."""
         return iter(self.names)
 
     def __len__(self):
+        """Return the number of sequences stored."""
         return len(self.names)
 
 
@@ -210,6 +298,17 @@ def __len__(self):
 
 # hydrophobic / hydrophilic
 def hydrophobic(aa):
+    """Return a numeric hydrophobicity score for a single amino acid.
+
+    Args:
+        aa: Single-letter amino-acid code string.
+
+    Returns:
+        2.0 for strongly hydrophobic residues (VILMFWC),
+        1.0 for weakly hydrophobic residues (AYHTSPG),
+        0.5 for weakly hydrophilic residues (RK),
+        0.0 for all other residues.
+    """
     if aa in 'VILMFWC': return 2.0
     if aa in 'AYHTSPG': return 1.0
     if aa in 'RK': return 0.5
@@ -309,7 +408,24 @@ def hydrophobic(aa):
 #
 
 class TranslateError (Exception):
+    """Exception raised when a codon cannot be translated correctly.
+
+    Attributes:
+        aa: The amino-acid sequence string being reverse-translated.
+        dna: The original DNA sequence string.
+        a: The amino-acid character that triggered the error.
+        codon: The DNA codon that did not match.
+    """
     def __init__(self, msg, aa, dna, a, codon):
+        """Initialize a TranslateError.
+
+        Args:
+            msg: Human-readable error message.
+            aa: Amino-acid sequence being processed.
+            dna: Original DNA sequence.
+            a: The amino-acid character at the point of failure.
+            codon: The DNA codon at the point of failure.
+        """
         Exception.__init__(self, msg)
         self.aa = aa
         self.dna = dna
@@ -319,7 +435,22 @@ def __init__(self, msg, aa, dna, a, codon):
 
 
 def translate(dna, table=CODON_TABLE):
-    """Translates DNA (with gaps) into amino-acids"""
+    """Translate a DNA sequence (with gaps) into an amino-acid sequence.
+
+    Codons containing "N" are translated to "X" (unknown amino acid).
+    Gap codons "---" are translated to "-".
+
+    Args:
+        dna: DNA string whose length must be a multiple of 3.
+        table: Codon-to-amino-acid lookup dict (default CODON_TABLE).
+
+    Returns:
+        Amino-acid sequence string.
+
+    Raises:
+        AssertionError: If len(dna) is not a multiple of 3.
+        KeyError: If a codon is not present in the codon table.
+    """
 
     aa = []
 
@@ -335,9 +466,24 @@ def translate(dna, table=CODON_TABLE):
 
 
 def revtranslate(aa, dna, check=False):
-    """Reverse translates aminoacids (with gaps) into DNA
+    """Reverse-translate an amino-acid sequence (with gaps) back into DNA.
+
+    The original ungapped DNA sequence must be supplied so that the correct
+    codons are restored. Gap characters "-" in aa are expanded to "---" in the
+    output.
+
+    Args:
+        aa: Amino-acid string (may contain "-" gap characters).
+        dna: Original ungapped DNA string used to recover codons.
+        check: If True, verify that each codon translates back to the
+            expected amino acid (default False).
 
-       Must supply original ungapped DNA.
+    Returns:
+        DNA string with codons matching the amino-acid sequence.
+
+    Raises:
+        TranslateError: If check=True and a codon does not translate to the
+            expected amino acid.
     """
 
     seq = []
@@ -361,7 +507,17 @@ def revtranslate(aa, dna, check=False):
          "b":"v", "v":"b", "d":"h", "h":"d"}
 
 def revcomp(seq):
-    """Reverse complement a sequence"""
+    """Return the reverse complement of a DNA sequence.
+
+    Handles IUPAC ambiguity codes as well as standard A/C/G/T bases (both
+    upper and lower case).
+
+    Args:
+        seq: DNA sequence string.
+
+    Returns:
+        Reverse-complemented DNA sequence string.
+    """
 
     seq2 = []
     for i in range(len(seq)-1, -1, -1):
@@ -370,6 +526,14 @@ def revcomp(seq):
 
 
 def gcContent(seq):
+    """Compute the GC content fraction of a DNA sequence.
+
+    Args:
+        seq: DNA sequence string containing A, C, G, and T characters.
+
+    Returns:
+        GC fraction as a float in [0.0, 1.0].
+    """
     # Inlined util.histDict: build a frequency dict of characters
     hist = {}
     for c in seq:
@@ -392,6 +556,23 @@ def gcContent(seq):
 
 
 def evolveKimuraSeq(seq, time, alpha=1, beta=1):
+    """Evolve a DNA sequence under the Kimura two-parameter model.
+
+    Each base is independently substituted according to transition (alpha)
+    and transversion (beta) rate parameters over the given evolutionary time.
+
+    Args:
+        seq: DNA sequence string (uppercase A/C/G/T only).
+        time: Evolutionary branch length (substitutions per site).
+        alpha: Transition rate parameter (default 1).
+        beta: Transversion rate parameter (default 1).
+
+    Returns:
+        Evolved DNA sequence string of the same length as seq.
+
+    Raises:
+        AssertionError: If substitution probabilities do not sum to one.
+    """
     probs = {
         's': .25 * (1 - math.e**(-4 * beta * time)),
         'u': .25 * (1 + math.e**(-4 * beta * time)
@@ -418,6 +599,20 @@ def evolveKimuraSeq(seq, time, alpha=1, beta=1):
 
 
 def evolveKimuraBase(base, time, alpha, beta):
+    """Evolve a single DNA base under the Kimura two-parameter model.
+
+    Args:
+        base: A single DNA base character (A/C/G/T).
+        time: Evolutionary branch length.
+        alpha: Transition rate parameter.
+        beta: Transversion rate parameter.
+
+    Returns:
+        The (possibly substituted) DNA base character.
+
+    Raises:
+        AssertionError: If substitution probabilities do not sum to one.
+    """
     probs = {
         's': .25 * (1 - math.e**(-4 * beta * time)),
         'u': .25 * (1 + math.e**(-4 * beta * time)
diff --git a/src/seqlib/seqstats.py b/src/seqlib/seqstats.py
index c587157..77f7ccc 100644
--- a/src/seqlib/seqstats.py
+++ b/src/seqlib/seqstats.py
@@ -1,4 +1,18 @@
 #!/usr/bin/env python
+"""Statistical utilities for peak enrichment analysis in RNA immunoprecipitation and ChIP-Seq experiments.
+
+Implements a PeakSeq-like approach for comparing experimental (RIP or ChIP)
+BAM files against input/IgG control BAM files.  The pipeline:
+
+1. Segments the genome into fixed-size bins and counts reads in each bin for
+   both the experimental and control samples.
+2. Determines a global normalisation factor (alpha) via linear regression on
+   bins that have reads in both samples.
+3. Tests each interval in a BED file using a binomial model (reads from the
+   experimental sample vs. alpha-scaled control reads) to assign p-values.
+4. Corrects for multiple testing using Benjamini-Hochberg FDR correction.
+5. Outputs results to stdout sorted or filtered by q-value.
+"""
 import getopt
 import math
 import sys
@@ -12,12 +26,16 @@
 #from rpy2 import robjects
 #from seqtools.genome import chr_lengths,genome_length
 
-"""Collection of utilities for determining peak enrichment in xxx-Seq experiments"""
-
 #################
 #Main
 #################
 def main():
+    """Legacy command-line entry point — reads three positional arguments and runs smRNApeakSeq.
+
+    Expects sys.argv to contain: expBam ctlBam bedFile.  Calls smRNApeakSeq
+    with filter=False and the module-level useStrand variable.  Prefer
+    newMain() for proper option parsing.
+    """
     expBam = sys.argv[1]
     ctlBam = sys.argv[2]
     bedFile = sys.argv[3]
@@ -29,6 +47,27 @@ def main():
 #Wrappers
 ########################
 def smRNApeakSeq(expBam,ctlBam,bedFile,cutoff = 0.0001,filter=True,useStrand=True):
+    """Runs the full smRNA/RIP-Seq peak-calling pipeline and writes results to stdout.
+
+    Segments the genome, computes a normalisation factor between experimental
+    and control BAM files, tests each BED interval with a binomial model,
+    applies Benjamini-Hochberg FDR correction, and prints tab-delimited
+    output.
+
+    Args:
+        expBam: Path to a sorted, indexed BAM file from the experimental
+            (RIP/ChIP) sample.
+        ctlBam: Path to a sorted, indexed BAM file from the control (IgG or
+            input) sample.
+        bedFile: Path to a BED file of candidate intervals to test.
+        cutoff: Q-value threshold below which results are printed when filter
+            is True (default: 0.0001).
+        filter: If True, only print intervals with q-value <= cutoff.  If
+            False, print all intervals (default: True).
+        useStrand: If True, count only reads on the same strand as each
+            interval.  If False, count all reads regardless of strand
+            (default: True).
+    """
     #open files
     expHandle = pysam.Samfile(expBam,'rb')
     ctlHandle = pysam.Samfile(ctlBam,'rb')
@@ -151,8 +190,24 @@ def cumBinom(nExp,adjCtl,P=0.5):
     return 1-scipy.stats.binom.cdf(nExp-1,nExp+adjCtl,P)
 
 def testInterval(interval,expHandle,ctlHandle,alpha):
-    """
-    #TODO:Make sure that this is only grabbing the appropriate strand and not both....this can be dangerous
+    """Tests a single genomic interval for strand-aware read enrichment.
+
+    Counts reads on the same strand as the interval from both the experimental
+    and control BAM files, scales the control count by alpha, and returns
+    a binomial p-value.
+
+    Args:
+        interval: An intervallib.Interval object with chr, start, end, and
+            strand attributes.
+        expHandle: A pysam AlignmentFile for the experimental sample.
+        ctlHandle: A pysam AlignmentFile for the control sample.
+        alpha: Normalisation factor (slope from getAlpha) used to scale
+            control counts to match the experimental library size.
+
+    Returns:
+        A tuple (pVal, nExp, adjCtl) where pVal is the binomial p-value,
+        nExp is the raw experimental read count, and adjCtl is the
+        alpha-scaled control read count.
     """
 
     #expCounter = mySam.Counter()
@@ -172,6 +227,24 @@ def testInterval(interval,expHandle,ctlHandle,alpha):
     return cumBinom(nExp,nCtl*alpha),nExp,nCtl*alpha
 
 def testIntervalNoStrand(interval,expHandle,ctlHandle,alpha):
+    """Tests a single genomic interval for read enrichment ignoring strand.
+
+    Counts all reads (both strands) overlapping the interval from experimental
+    and control BAM files, scales control count by alpha, and returns a
+    binomial p-value.
+
+    Args:
+        interval: An intervallib.Interval object with chr, start, and end
+            attributes.
+        expHandle: A pysam AlignmentFile for the experimental sample.
+        ctlHandle: A pysam AlignmentFile for the control sample.
+        alpha: Normalisation factor used to scale control counts.
+
+    Returns:
+        A tuple (pVal, nExp, adjCtl) where pVal is the binomial p-value,
+        nExp is the raw experimental read count, and adjCtl is the
+        alpha-scaled control read count.
+    """
     expCounter = mySam.Counter()
     ctlCounter = mySam.Counter()
     expFetch = expHandle.fetch(interval.chr,interval.start,interval.end,callback=expCounter)
@@ -235,19 +308,56 @@ def poissonProb(lamb,height):
 #########################
 
 def slope(xarray,yarray):
-    """Uses numpy, in fact assumes that the list arguments are numpy arrays."""
+    """Computes the slope of the ordinary least-squares regression line.
+
+    Uses numpy arrays for efficient computation.  The slope is:
+        m = (n*sum(x*y) - sum(x)*sum(y)) / (n*sum(x^2) - (sum(x))^2)
+
+    Args:
+        xarray: A numpy array of x (independent variable) values.
+        yarray: A numpy array of y (dependent variable) values of the same
+            length as xarray.
+
+    Returns:
+        The slope of the linear regression line (float).
+    """
     n = float(len(xarray))
     m = (n*sum(xarray*yarray)-sum(xarray)*sum(yarray))/(n*sum(xarray**2)-(sum(xarray))**2)
     return m
 
 def intercept(xarray,yarray):
-    """Uses numpy, in fact assumes that the list arguments are numpy arrays."""
+    """Computes the y-intercept of the ordinary least-squares regression line.
+
+    Uses numpy arrays for efficient computation.  The intercept is:
+        b = (sum(y) - m*sum(x)) / n
+
+    Args:
+        xarray: A numpy array of x (independent variable) values.
+        yarray: A numpy array of y (dependent variable) values of the same
+            length as xarray.
+
+    Returns:
+        The y-intercept of the linear regression line (float).
+    """
     m = slope(xarray,yarray)
     n = float(len(xarray))
     b = (sum(yarray)-m*(sum(xarray)))/n
     return b
 
 def getSegmentCounts(bamHandle,segSize=10000):
+    """Counts reads in fixed-size genomic bins across all chromosomes in a BAM file.
+
+    Iterates over all chromosomes and divides each into bins of segSize base
+    pairs, counting the total number of reads per bin using mySam.Counter.
+
+    Args:
+        bamHandle: A pysam AlignmentFile opened for reading.
+        segSize: Bin size in base pairs (default: 10000).
+
+    Returns:
+        A numpy array of read counts, one element per bin, ordered by
+        chromosome then genomic position.
+    """
     chrs = bamHandle.references
     chr_lengths = bamHandle.lengths
     bins = numpy.zeros(sum(chr_lengths)//segSize+len(chrs))
@@ -263,11 +373,43 @@ def getSegmentCounts(bamHandle,segSize=10000):
     return bins
 
 def getNonZeroIndices(bins1,bins2):
+    """Returns the indices of bins that have non-zero counts in both arrays.
+
+    Used to restrict linear regression normalisation to bins that are
+    informative in both the experimental and control samples.
+
+    Args:
+        bins1: A numpy array of read counts (e.g. experimental sample bins).
+        bins2: A numpy array of read counts (e.g. control sample bins) of
+            the same length as bins1.
+
+    Returns:
+        A list of integer indices where both bins1 and bins2 have non-zero
+        values.
+    """
     set1 = set(numpy.nonzero(bins1)[0])
     set2 = set(numpy.nonzero(bins2)[0])
     return list(set1.intersection(set2))
 
 def getAlpha(expBins,ctlBins,index):
+    """Computes the normalisation factor (alpha) between experimental and control samples.
+
+    Fits a linear regression through the origin on the subset of bins
+    specified by index, treating control counts as x and experimental counts
+    as y.  The slope is used to scale the control sample to the experimental
+    library size.
+
+    Args:
+        expBins: Numpy array of per-bin read counts for the experimental
+            sample.
+        ctlBins: Numpy array of per-bin read counts for the control sample.
+        index: List of integer indices identifying informative bins (non-zero
+            in both arrays).
+
+    Returns:
+        Alpha (float): the slope of the linear regression, used as the
+        multiplicative scaling factor for control counts.
+    """
     return slope(ctlBins[index],expBins[index])
 
 def getAlphaFromLinReg(exp,ctl,r):
@@ -304,10 +446,35 @@ def getAlphaFromLinReg(exp,ctl,r):
 '''
 
 class Usage(Exception):
+    """Exception raised for command-line usage errors in seqstats.
+
+    Attributes:
+        msg: Human-readable explanation of the error or the help message.
+    """
     def __init__(self, msg):
+        """Initialises a Usage exception.
+
+        Args:
+            msg: Human-readable error or help text.
+        """
         self.msg = msg
 
 def newMain(argv=None):
+    """Command-line entry point for the seqstats peak-calling pipeline.
+
+    Parses command-line options and delegates to smRNApeakSeq.  Supports
+    optional strand-specific counting, q-value filtering, and verbose output.
+
+    Args:
+        argv: List of command-line argument strings.  Defaults to sys.argv
+            when None.
+
+    Returns:
+        2 on usage error, None on success.
+
+    Raises:
+        SystemExit: Indirectly via sys.exit() on usage error.
+    """
     if argv is None:
         argv = sys.argv
     try:
diff --git a/src/seqlib/sequencelib.py b/src/seqlib/sequencelib.py
index 9071876..aa4c77e 100644
--- a/src/seqlib/sequencelib.py
+++ b/src/seqlib/sequencelib.py
@@ -1,4 +1,10 @@
 #/usr/bin/env python
+"""Sequence utility functions for DNA/RNA analysis.
+
+Provides parsers, generic sequence tools, and motif tools for working
+with biological sequence data including FASTA parsing, complement
+computation, GC content, k-mer analysis, and random sequence generation.
+"""
 import math
 import operator
 import random
@@ -11,11 +17,26 @@
 #Parsers
 ######
 def FastaIterator(handle):
-    """
-    Generator function to iterate over fasta records in <handle>:
-    Use in a loop to apply to each Seq record contained in a .fasta file
-    Input: record handle as obtained by handle = open(<file>,'r')
-    Returns an iterator across Sequences in file
+    """Iterate over FASTA records in an open file handle.
+
+    Skips any header text before the first '>' character, then yields
+    one record dict per FASTA entry.  Each sequence has internal
+    whitespace stripped and lines joined into a single string.
+
+    Args:
+        handle: A readable file object (e.g. opened with ``open(path, 'r')``)
+            positioned at or before the first FASTA record.
+
+    Yields:
+        A dict with keys:
+            ``'name'``: The record header string (everything after ``>``
+            on the header line, whitespace-stripped).
+            ``'sequence'``: The concatenated sequence string with all
+            internal spaces removed.
+
+    Raises:
+        ValueError: If a record block does not begin with a ``>``
+            character as required by the FASTA format.
     """
     #Skip any header text
     while True:
@@ -49,6 +70,24 @@ def FastaIterator(handle):
 ###
 
 def complement(s):
+    """Return the base-by-base complement of a DNA sequence as a list.
+
+    Handles both upper- and lower-case input characters.  Note that the
+    lower-case mapping contains a known quirk: ``'c'`` maps to ``'t'``
+    instead of ``'g'``.
+
+    Args:
+        s: An iterable of single-character DNA bases (``A``, ``T``, ``G``,
+            ``C`` in either case).
+
+    Returns:
+        A list of single-character strings representing the complemented
+        bases in the same order as the input.
+
+    Raises:
+        KeyError: If a character in ``s`` is not present in the complement
+            lookup table.
+    """
     comp = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A',
             'a': 't', 'c': 't', 'g': 'c', 't': 'a'
             }
@@ -56,27 +95,103 @@ def complement(s):
     return complseq
 
 def reverse_complement(s):
+    """Return the reverse complement of a DNA sequence string.
+
+    Reverses the sequence and then complements each base using
+    :func:`complement`.
+
+    Args:
+        s: A DNA sequence string containing bases ``A``, ``T``, ``G``, ``C``
+            (upper or lower case).
+
+    Returns:
+        A string that is the reverse complement of ``s``.
+    """
     seq = list(s)
     seq.reverse()
     return ''.join(complement(seq))
 
 def rcomp(s):
-    """Does same thing as reverse_complement only cooler"""
+    """Return the reverse complement of an uppercase DNA string.
+
+    Uses ``str.translate`` with a precomputed translation table for
+    ``A<->T`` and ``C<->G``, then reverses the result with a slice.
+    Equivalent to :func:`reverse_complement` but operates only on
+    uppercase bases via the translation table.
+
+    Args:
+        s: An uppercase DNA sequence string (``A``, ``T``, ``C``, ``G``).
+
+    Returns:
+        A string that is the reverse complement of ``s``.
+    """
     return s.translate(string.maketrans("ATCG","TAGC"))[::-1]
 
 def getTm(seq):
+    """Calculate the melting temperature (Tm) of a DNA sequence.
+
+    Uses the nearest-neighbour-inspired empirical formula::
+
+        Tm = 79.8 + 18.5*log10([Na+]) + 58.4*GC + 11.8*GC^2 - 820/len
+
+    where ``[Na+]`` is fixed at 0.05 M and ``GC`` is the fractional GC
+    content of the sequence.
+
+    Args:
+        seq: A DNA sequence string.
+
+    Returns:
+        The estimated melting temperature in degrees Celsius as a float.
+    """
     Tm = 79.8 + 18.5*math.log10(0.05) + (58.4 * getGC(seq)) + (11.8 * getGC(seq)**2) - (820/len(seq))
     return Tm
 
 def getGC(seq):
+    """Return the fractional GC content of a DNA sequence.
+
+    Counts both upper- and lower-case ``G`` and ``C`` characters and
+    divides by the total sequence length.
+
+    Args:
+        seq: A DNA sequence string.
+
+    Returns:
+        A float in [0, 1] representing the proportion of G and C bases.
+    """
     return (seq.count('C')+seq.count('G')+seq.count('c')+seq.count('g'))/float(len(seq))
 
 def gc_content(seq):
+    """Return the percentage GC content of a nucleotide sequence.
+
+    Counts G and C characters (upper and lower case) and divides by the
+    sum of all A, T, U, G, C characters (upper and lower case), ignoring
+    any ambiguity codes or gap characters.  The result is scaled to
+    percentage (0–100).
+
+    Args:
+        seq: A DNA or RNA sequence string.
+
+    Returns:
+        A float representing GC content as a percentage (0–100).
+    """
     gc = mcount(seq, 'GCgc')
     at = mcount(seq, 'ATUatu')
     return 100*gc/float((gc+at))
 
 def mcount(s, chars):
+    """Count all occurrences of any character in ``chars`` within string ``s``.
+
+    Iterates over each character in ``chars`` and accumulates the count of
+    its appearances in ``s`` using ``string.count``.
+
+    Args:
+        s: The string to search within.
+        chars: A string whose individual characters are each counted in ``s``.
+
+    Returns:
+        The total number of occurrences of any character from ``chars``
+        found in ``s`` as an integer.
+    """
     # sums the counts of appearances of each char in chars
     count = 0
     for char in chars:
@@ -84,6 +199,26 @@ def mcount(s, chars):
     return count
 
 def prob_seq(seq, pGC=.5):
+    """Return the probability of a DNA sequence under a background GC model.
+
+    Assumes each position is independently drawn from a 4-letter alphabet
+    where G and C each have probability ``pGC/2`` and A and T each have
+    probability ``(1-pGC)/2``.  The joint probability is the product of
+    per-position probabilities.
+
+    Args:
+        seq: A DNA sequence string containing only ``A``, ``T``, ``G``,
+            or ``C`` characters (upper case).
+        pGC: The background GC probability in [0, 1].  Defaults to 0.5.
+
+    Returns:
+        The probability of observing ``seq`` under the model as a float.
+
+    Raises:
+        AssertionError: If ``pGC`` is outside [0, 1].
+        ValueError: If ``seq`` contains a character other than
+            ``A``, ``T``, ``G``, or ``C``.
+    """
     # given a GC content, what is the probability
     # of getting the particular sequence
 
@@ -98,10 +233,36 @@ def prob_seq(seq, pGC=.5):
     return reduce(operator.mul, ps, 1)
 
 def transcribe(seq):
+    """Transcribe a DNA sequence to RNA by replacing thymine with uracil.
+
+    Performs a simple string substitution of every uppercase ``'T'``
+    with ``'U'``.  Lower-case ``'t'`` characters are not converted.
+
+    Args:
+        seq: A DNA sequence string (upper case ``T`` will be replaced).
+
+    Returns:
+        The RNA sequence string with all ``'T'`` characters replaced by
+        ``'U'``.
+    """
     RNA = seq.replace('T', 'U')
     return RNA
 
 def GenRandomSeq(length, type='DNA'):
+    """Generate a random nucleotide sequence of a given length.
+
+    Each position is drawn uniformly and independently from the
+    four-letter alphabet appropriate for the requested sequence type.
+
+    Args:
+        length: The number of nucleotides in the returned sequence.
+        type: The sequence type: ``'DNA'`` (alphabet ``A``, ``T``, ``G``,
+            ``C``) or ``'RNA'`` (alphabet ``A``, ``U``, ``G``, ``C``).
+            Defaults to ``'DNA'``.
+
+    Returns:
+        A random nucleotide sequence string of the specified length.
+    """
     if type == 'DNA':
         chars = ['A','T','G','C']
     if type == 'RNA':
@@ -109,9 +270,31 @@ def GenRandomSeq(length, type='DNA'):
     return ''.join([random.choice(chars) for i in range(length)])
 
 def seed():
+    """Re-seed the random number generator from the current system time.
+
+    Calls :func:`random.seed` with no arguments, which uses the OS entropy
+    source or the current time as the seed.  Useful for resetting
+    deterministic state after a fixed seed has been set elsewhere.
+    """
     random.seed()
 
 def draw(distribution):
+    """Draw a random index from a discrete probability distribution.
+
+    Iterates through the distribution, accumulating a running sum, and
+    returns the index of the first element whose cumulative sum exceeds
+    a uniformly drawn random number.
+
+    Args:
+        distribution: A list of non-negative floats that sum to
+            approximately 1.0.  Element ``i`` represents the probability
+            of returning index ``i``.
+
+    Returns:
+        An integer index into ``distribution`` sampled according to the
+        distribution's probabilities, or ``None`` if no element was
+        selected (which can occur when probabilities do not sum to 1).
+    """
     sum=0
     r = random.random()
     for i in range(0,len(distribution)):
@@ -120,6 +303,22 @@ def draw(distribution):
             return i
 
 def makeDistFromFreqs(freqs):
+    """Build a cumulative distribution list from a nucleotide frequency dict.
+
+    Converts a dictionary of base frequencies into a list of cumulative
+    boundary values suitable for use with :func:`draw`.  Bases are
+    processed in the fixed order ``A``, ``T``, ``C``, ``G``.
+
+    Args:
+        freqs: A dictionary mapping nucleotide characters (``'A'``,
+            ``'T'``, ``'C'``, ``'G'``) to their relative frequencies.
+            Values should be non-negative and sum to 1.0.
+
+    Returns:
+        A list of five floats: the initial ``0.0`` followed by the
+        cumulative sum after adding each of ``A``, ``T``, ``C``, ``G``
+        in that order.
+    """
     res = []
     chars = ['A','T','C','G']
     cum = 0
@@ -130,8 +329,21 @@ def makeDistFromFreqs(freqs):
     return res
 
 def genRandomFromDist(length,freqs):
-    """Generates a random sequence of length 'length' drawing from a distribution of
-    base frequencies in a dictionary"""
+    """Generate a random DNA sequence drawn from a given base-frequency distribution.
+
+    Builds a cumulative distribution from ``freqs`` and samples each
+    position independently using :func:`draw`.
+
+    Args:
+        length: The number of nucleotides in the returned sequence.
+        freqs: A dictionary mapping nucleotide characters (``'A'``,
+            ``'T'``, ``'C'``, ``'G'``) to their probabilities.  Values
+            should be non-negative and sum to 1.0.
+
+    Returns:
+        A random DNA sequence string of the specified length, with each
+        base sampled proportionally to its frequency.
+    """
     myDist = makeDistFromFreqs(freqs)
     chars = ['A','T','C','G']
     return ''.join([chars[draw(myDist)] for i in range(length)])
@@ -140,6 +352,29 @@ def genRandomFromDist(length,freqs):
 #Motif Tools
 ###########
 def allindices(string, sub, listindex=[], offset=0):
+    """Find all start indices of substring ``sub`` within ``string``.
+
+    Searches for non-overlapping occurrences of ``sub`` in ``string``
+    starting from ``offset`` and appends each found index to
+    ``listindex``.
+
+    Warning:
+        ``listindex`` uses a mutable default argument.  Repeated calls
+        without explicitly passing a new list will accumulate results
+        across calls.
+
+    Args:
+        string: The string to search within.
+        sub: The substring to search for.
+        listindex: A list to which found indices are appended.
+            Defaults to a shared mutable list (see warning above).
+        offset: The character position at which to start the search.
+            Defaults to 0.
+
+    Returns:
+        The ``listindex`` list (same object passed in) with the start
+        positions of all occurrences of ``sub`` appended.
+    """
     i = string.find(sub, offset)
     while i >= 0:
         listindex.append(i)
@@ -147,6 +382,19 @@ def allindices(string, sub, listindex=[], offset=0):
     return listindex
 
 def find_all(seq, sub):
+    """Find all start positions of a substring within a sequence string.
+
+    Iterates through ``seq`` looking for non-overlapping occurrences of
+    ``sub`` using :func:`string.find` and collects each start index.
+
+    Args:
+        seq: The sequence string to search within.
+        sub: The substring to search for.
+
+    Returns:
+        A list of integer start positions (0-based) of all occurrences of
+        ``sub`` in ``seq``.  Returns an empty list if ``sub`` is not found.
+    """
     #print "Looking for %s in %s"%(sub,seq)
     found = []
     next = string.find(seq,sub)
@@ -156,7 +404,25 @@ def find_all(seq, sub):
     return found
 
 def kmer_dictionary_counts(seq,k,dic={}):
-    """Returns a dictionary of k,v = kmer:'count of kmer in seq'"""
+    """Count all k-mers in a sequence and store the counts in a dictionary.
+
+    Slides a window of width ``k`` across ``seq`` and increments the
+    count for each k-mer substring encountered.
+
+    Warning:
+        ``dic`` uses a mutable default argument.  Repeated calls without
+        explicitly passing a fresh dict will accumulate counts across calls.
+
+    Args:
+        seq: The nucleotide (or any) sequence string to count k-mers in.
+        k: The length of each k-mer.
+        dic: A dictionary to update with k-mer counts.  Defaults to a
+            shared mutable dict (see warning above).
+
+    Returns:
+        The updated ``dic`` dictionary mapping each k-mer string to its
+        occurrence count in ``seq``.
+    """
     for i in range(0, len(seq)-k):
         subseq = seq[i:][:k]
         #if not dic.has_key(subseq): dic[subseq] = 1
@@ -166,15 +432,53 @@ def kmer_dictionary_counts(seq,k,dic={}):
     return dic
 
 def kmer_dictionary(seq,k,dic={},offset=0):
-    """Returns dictionary of k,v = kmer:'list of kmer start positions in seq' """
+    """Build a dictionary mapping each k-mer to its start positions in a sequence.
+
+    Slides a window of width ``k`` across ``seq`` and records each
+    1-based start position under the corresponding k-mer key.
+
+    Warning:
+        ``dic`` uses a mutable default argument.  Repeated calls without
+        passing a fresh dict will accumulate positions across calls.
+
+    Args:
+        seq: The nucleotide (or any) sequence string to index.
+        k: The length of each k-mer.
+        dic: A dictionary to update with k-mer position lists.  Defaults
+            to a shared mutable dict (see warning above).
+        offset: Unused parameter retained for API compatibility.
+
+    Returns:
+        The updated ``dic`` dictionary mapping each k-mer string to a list
+        of 1-based integer start positions at which it occurs in ``seq``.
+    """
     for i in range(0,len(seq)-k):
         subseq = seq[i:][:k]
         dic.setdefault(subseq,[]).append(i+1)
     return dic
 
 def kmer_stats(kmer,dic,genfreqs):
-    """Takes as argument a kmer string, a dictionary with kmers as keys from kmer_dictionary_counts, and a dictionary
-        of genomic frequencies with kmers as keys. Returns a dictionary of stats for kmer ("Signal2Noise Ratio, Z-score")
+    """Compute enrichment statistics for a k-mer relative to genomic background.
+
+    Calculates the signal-to-noise ratio (SNR) and Z-score for the
+    observed count of ``kmer`` in a sequence compared to the count
+    expected under a genomic-frequency background model.
+
+    The expected count is ``sum(dic.values()) * genfreqs[kmer]``.
+
+    Args:
+        kmer: The k-mer string to evaluate.
+        dic: A dictionary mapping k-mer strings to their observed counts,
+            as returned by :func:`kmer_dictionary_counts`.
+        genfreqs: A dictionary mapping k-mer strings to their expected
+            background frequencies (floats summing to 1 across all k-mers
+            of that length).
+
+    Returns:
+        A dict with keys ``'snr'`` (signal-to-noise ratio) and
+        ``'zscore'`` (Z-score) if ``kmer`` is present in both ``dic`` and
+        ``genfreqs``.  Returns ``None`` if ``dic`` is empty or ``kmer``
+        is absent from either dictionary.
     """
     if not dic: return
     if kmer in dic.keys() and kmer in genfreqs.keys():
@@ -186,6 +490,28 @@ def kmer_stats(kmer,dic,genfreqs):
     else: return
 
 def get_seeds(iter,seeds={}):
+    """Collect and count 7-mer seeds from an iterable of sequence records.
+
+    Iterates over sequence records, converts each from colorspace to DNA
+    (by calling ``CSToDNA()`` on each record), extracts a 7-base seed
+    from positions 1–7 (1-based, i.e. ``sequence[1:8]``), and counts the
+    occurrences of each seed.  Prints progress every 10 000 records.
+
+    Warning:
+        ``seeds`` uses a mutable default argument.  Repeated calls
+        without passing a fresh dict will accumulate counts across calls.
+
+    Args:
+        iter: An iterable of sequence-record objects.  Each object must
+            have a ``sequence`` attribute and a ``CSToDNA()`` method that
+            converts colorspace encoding to DNA in-place.
+        seeds: A dictionary to update with seed counts.  Defaults to a
+            shared mutable dict (see warning above).
+
+    Returns:
+        The updated ``seeds`` dictionary mapping 7-mer seed strings to
+        their occurrence counts.
+    """
     counter = 0
     for i in iter:
         counter+=1
diff --git a/src/seqlib/shrimp.py b/src/seqlib/shrimp.py
index 9dd637d..b328518 100644
--- a/src/seqlib/shrimp.py
+++ b/src/seqlib/shrimp.py
@@ -1,4 +1,13 @@
 #!/usr/bin/python
+"""Utilities for running and parsing SHRiMP colorspace short-read alignments.
+
+Provides classes and functions for preparing input files, submitting jobs to
+LSF, parsing SHRiMP v1.1+ alignment output, and parsing probcalc statistical
+output for colorspace (SOLiD) short-read sequencing data.
+
+SHRiMP (Short Read Mapping Package) aligns colorspace reads from the Applied
+Biosystems SOLiD platform to a reference genome.
+"""
 import glob
 import os
 import random
@@ -25,9 +34,43 @@
 order = ["readname","contigname","strand","contigstart","contigend","readstart","readend","readlength","score","editstring","readsequence"]
 #######################
 class ShrimpRead(Alignment):
-    """Extends Alignment class to include a few SHRiMP-specific attributes and methods"""
-    
+    """Extends Alignment class to include SHRiMP-specific attributes and methods.
+
+    Represents a single read alignment produced by the SHRiMP rmapper-cs
+    aligner.  In addition to the base Alignment attributes, stores colorspace
+    edit string information and counts of mismatches and crossover events.
+
+    Attributes:
+        readstart: 0-based start position within the read.
+        readend: End position within the read.
+        readcount: Number of times this read sequence was observed.
+        editstring: SHRiMP edit string describing mismatches and crossovers
+            relative to the reference.
+        readlength: Length of the read in bases.
+        crossovers: Number of colorspace crossover errors ('x') in the edit
+            string.
+        nSNPs: Number of apparent SNP calls (A/C/G/T substitutions) in the
+            edit string.
+        aligner: Always 'shrimp' for this class.
+    """
+
     def __init__(self,readname,chr,start,end,strand,readstart,readend,score,readcount,readsequence,editstring,readlength):
+        """Initialises a ShrimpRead from parsed SHRiMP alignment fields.
+
+        Args:
+            readname: Unique identifier (nuID or read name) of the read.
+            chr: Chromosome name of the alignment target.
+            start: 0-based genomic start coordinate.
+            end: Genomic end coordinate.
+            strand: Strand orientation ('+' or '-').
+            readstart: Start position within the read sequence.
+            readend: End position within the read sequence.
+            score: Alignment score from SHRiMP.
+            readcount: Number of times this read was observed (from nuID).
+            readsequence: The read sequence string.
+            editstring: SHRiMP edit/cigar string.
+            readlength: Length of the read.
+        """
         Alignment.__init__(self,readname,chr,start,end,strand,score,readcount,readsequence)
         self.readstart = int(readstart)
         self.readend = int(readend)
@@ -41,17 +84,58 @@ def __init__(self,readname,chr,start,end,strand,readstart,readend,score,readcoun
         self.aligner = "shrimp"
         
     def __len__(self):
+        """Returns the length of the read."""
         return self.readlength
-    
+
     def __str__(self):
+        """Returns a short string representation: 'SHRiMP:readname:chr:start:end'."""
         return "SHRiMP:%s:%s:%d:%d" % (self.readname,self.chr,self.start,self.end)
-    
+
     def shrimpString(self):
+        """Returns the alignment formatted as a SHRiMP output line.
+
+        The returned string matches the tab-delimited format produced by
+        SHRiMP, beginning with '>readsequence_xreadcount'.
+
+        Returns:
+            A SHRiMP-format alignment string ending with a newline.
+        """
         return ">%s_x%d\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\n" % (self.readsequence,self.readcount,self.chr,self.strand,self.start,self.end,self.readstart,self.readend,self.readlength,self.score,self.editstring,self.readsequence)
 
 class ProbCalcRead(ShrimpRead):
-    """Extends ShrimpRead class to include statistical output from probcalc"""
+    """Extends ShrimpRead to include statistical scores from the SHRiMP probcalc utility.
+
+    The probcalc utility assigns probabilistic scores to SHRiMP alignments
+    to help distinguish true genomic mappings from chance alignments.  In
+    addition to ShrimpRead attributes, this class stores the normalised odds
+    ratio and two probability scores.
+
+    Attributes:
+        normodds: Normalised odds ratio for this alignment.
+        pgenome: Probability that the read originated from the genome.
+        pchance: Probability that the alignment is due to chance.
+    """
     def __init__(self,readname,chr,start,end,strand,readstart,readend,score,readcount,editstring,readlength,normodds,pgenome,pchance,readsequence=''):
+        """Initialises a ProbCalcRead from probcalc output fields.
+
+        Args:
+            readname: nuID-encoded read name; the read sequence is decoded
+                from this via misc.nuID2seq.
+            chr: Chromosome name of the alignment target.
+            start: 0-based genomic start coordinate.
+            end: Genomic end coordinate.
+            strand: Strand orientation ('+' or '-').
+            readstart: Start position within the read.
+            readend: End position within the read.
+            score: SHRiMP alignment score.
+            readcount: Observation count encoded in the read name.
+            editstring: SHRiMP edit string.
+            readlength: Length of the read.
+            normodds: Normalised odds ratio from probcalc.
+            pgenome: Probability the read originates from the genome.
+            pchance: Probability of a chance alignment.
+            readsequence: Optional read sequence string (default: '').
+        """
         ShrimpRead.__init__(self,readname,chr,start,end,strand,readstart,readend,score,readcount,readsequence,editstring,readlength)
         self.readsequence = misc.nuID2seq(self.readname)
         self.normodds = float(normodds)
@@ -78,6 +162,16 @@ def prepShrimp(file,basedir,binSize=1000):
     os.chdir(curDir)
 
 def GenRandom(length = 10, chars=string.letters+string.digits):
+    """Generates a random alphanumeric string of the given length.
+
+    Args:
+        length: Number of characters in the returned string (default: 10).
+        chars: Pool of characters to sample from (default: all ASCII letters
+            and digits).
+
+    Returns:
+        A random string of the specified length drawn from chars.
+    """
     return ''.join([random.choice(chars) for i in range(length)])
 
 def submitShrimp(queue="broad",cwd = os.getcwd(),outDir="../results/",readLength=25):
diff --git a/src/seqlib/smRNA.py b/src/seqlib/smRNA.py
index e93e0f6..a6dae22 100644
--- a/src/seqlib/smRNA.py
+++ b/src/seqlib/smRNA.py
@@ -1,12 +1,14 @@
 #!/usr/bin/env python
-'''
-Created on Oct 8, 2009
-Generates list of candidate siRNAs from .fasta sequence given as argument
+"""Tools for designing small RNA molecules including siRNAs, dsRNAs, and ASOs.
 
-@author: lgoff
+Generates and scores candidate siRNA sequences from FASTA input according to
+published design rules for RNA interference (RNAi).  Also includes support for
+RNA activation (RNAa) dsRNA design based on Vera et al. criteria, and
+antisense oligonucleotide (ASO) scanning.
 
-Reference: http://www.protocol-online.org/prot/Protocols/Rules-of-siRNA-design-for-RNA-interference--RNAi--3210.html
-'''
+Reference:
+    http://www.protocol-online.org/prot/Protocols/Rules-of-siRNA-design-for-RNA-interference--RNAi--3210.html
+"""
 import math
 import sys
 
@@ -14,7 +16,15 @@
 
 
 def main(fastaFile):
-    """Do it all"""
+    """Runs the full siRNA candidate pipeline on a FASTA file.
+
+    Opens the FASTA file, iterates over each record, and prints candidate
+    siRNA sequences with their scores to stdout using evaluateSequence.
+
+    Args:
+        fastaFile: Path to a FASTA-format file containing one or more
+            nucleotide sequences to screen for siRNA candidates.
+    """
     handle = open(fastaFile,'r')
     iter = sequencelib.FastaIterator(handle)
     for i in iter:
@@ -22,7 +32,17 @@ def main(fastaFile):
         evaluateSequence(i["sequence"])
         
 def evaluateSequence(seq,scoreCutoff=6):
-    """Wrapper for testCandidate() that iterates across sequence provided and returns candidates with a score >= scoreCutoff (default = 6)"""
+    """Scans a nucleotide sequence for siRNA candidates meeting a score threshold.
+
+    Slides a 21-nt window across the sequence, scores each window with
+    testCandidate, and prints passing candidates together with their BlockIt
+    insert sequences.
+
+    Args:
+        seq: Nucleotide sequence string to scan.
+        scoreCutoff: Minimum score (inclusive) for a 21-mer to be reported
+            (default: 6).
+    """
     for i in range(0,len(seq)-21):
         candidate = seq[i:i+21]
         score = testCandidate(candidate)
@@ -32,7 +52,27 @@ def evaluateSequence(seq,scoreCutoff=6):
             print("Fwd:%s\tRev:%s" % (insertSeqs[0],insertSeqs[1])) 
             
 def testCandidate(seq):
-    """Checks 21mer candidates against siRNA rules and assigns a score on a scale of 0-8"""
+    """Scores a 21-mer siRNA candidate against established siRNA design rules.
+
+    Evaluates the 21-nt sense strand against the following criteria:
+        1. Moderate GC content (30-52%) — +1 point.
+        2. At least 3 A/U nucleotides at positions 15-19 — +1 per A/U (up to +4).
+        3. Lack of internal repeats (melting temperature < 20 °C) — +1 point.
+        4. 'A' at position 19 — +1 point.
+        5. 'A' at position 3 — +1 point.
+        6. 'U' (T in DNA) at position 10 — +1 point.
+        7. G or C at position 19 — -1 point.
+        8. 'G' at position 13 — -1 point.
+        9. Homopolymer run of 4 or more identical bases — -5 points per run.
+
+    Args:
+        seq: A 21-nucleotide DNA-sequence string representing the siRNA sense
+            strand (case-insensitive; T is used in place of U).
+
+    Returns:
+        Numeric score (float) on an approximate scale of 0-8.  Returns False
+        if the sequence is not exactly 21 nt.
+    """
     #seq = seq.upper()
     if len(seq)!=21:
         assert ValueError("Candidate is not 21nt in length")
@@ -72,10 +112,31 @@ def testCandidate(seq):
     return score
 
 def getTm(seq):
+    """Calculates the melting temperature (Tm) of a nucleotide sequence.
+
+    Uses an empirical formula suitable for oligonucleotides to estimate the
+    melting temperature in degrees Celsius assuming a salt concentration of
+    50 mM:
+        Tm = 79.8 + 18.5*log10([Na+]) + 58.4*GC + 11.8*GC^2 - 820/len
+
+    Args:
+        seq: A nucleotide sequence string.
+
+    Returns:
+        Estimated melting temperature in degrees Celsius (float).
+    """
     Tm = 79.8 + 18.5*math.log10(0.05) + (58.4 * getGC(seq)) + (11.8 * getGC(seq)**2) - (820/len(seq))
     return Tm
 
 def getGC(seq):
+    """Calculates the GC content of a nucleotide sequence.
+
+    Args:
+        seq: A nucleotide sequence string (case-insensitive).
+
+    Returns:
+        GC fraction as a float between 0.0 and 1.0.
+    """
     seq = seq.upper()
     return (seq.count('C')+seq.count('G'))/float(len(seq))
 
@@ -83,8 +144,29 @@ def getGC(seq):
 #dsRNA rules from Vera et al. (updated 2-1-10)
 ######
 def scanPromoter(promSeq):
-    """
-    Evaluates candidate dsRNAs for RNAa from a given sequence.  Returns a list of dictionaries of candidates and their score.
+    """Scans a promoter sequence for RNA activation (RNAa) dsRNA candidates.
+
+    Slides a 19-nt window across the promoter sequence and scores each window
+    against design rules derived from Vera et al. for small activating RNA
+    (saRNA) design.  Scoring rules include:
+        - GC content 40-65%: +1 point.
+        - Homopolymer run of 4 or more bases: -5 points per run.
+        - 'A' at position 19: +1 point.
+        - G or C at position 19: -1 point.
+        - 'A' at position 18: +2 points; 'T' at position 18: +1 point.
+        - 'T' at position 7: +1 point.
+        - 3 or more A/T nucleotides at positions 20-23 (3' flank): bonus points.
+        - Tm < 20 °C (low internal repeats): +1 point.
+
+    Args:
+        promSeq: Promoter DNA/RNA sequence string to scan (case-insensitive;
+            converted to uppercase internally).
+
+    Returns:
+        A list of candidate dictionaries sorted by descending score.  Each
+        dictionary contains: 'seq' (19-nt candidate), 'pos' (position relative
+        to 3' end of promSeq), 'gc' (GC fraction), 'score' (float), and
+        'Tm' (melting temperature in °C).
     """
     promSeq = promSeq.upper()
     window = 19
@@ -141,8 +223,23 @@ def scanPromoter(promSeq):
     return sorted(candidates,key=lambda k: k['score'],reverse=True)
 
 def ASOscan(targetSeq):
-    """
-    Evaluates candidate dsRNAs for RNAa from a given sequence.  Returns a list of dictionaries of candidates and their score.
+    """Scans a target RNA sequence for antisense oligonucleotide (ASO) candidates.
+
+    Reverse-complements the input sequence and slides a 20-nt window across
+    it to evaluate ASO design candidates.  Each candidate is scored primarily
+    on GC content (45-65% preferred, +2 points) and melting temperature
+    (Tm > 45 °C preferred, +2 points), with penalties for homopolymer runs
+    of 4 or more bases (-5 points each).
+
+    Args:
+        targetSeq: The target RNA/DNA sequence string (sense strand) to
+            design ASOs against.  It is reverse-complemented internally.
+
+    Returns:
+        A list of candidate dictionaries sorted by descending score, each
+        containing keys: 'seq' (20-nt candidate sequence), 'pos' (position
+        relative to 3' end of input), 'gc' (GC fraction), 'score' (float
+        total score), and 'Tm' (melting temperature in °C).
     """
     targetSeq = sequencelib.rcomp(targetSeq)
     window = 20
@@ -200,6 +297,20 @@ def ASOscan(targetSeq):
     return sorted(candidates,key=lambda k: k['score'],reverse=True)
 
 def makeDsRNA(seq):
+    """Formats a 19-nt RNA sequence as a dsRNA oligonucleotide pair with TT 3' overhangs.
+
+    Produces the sense and antisense strands in a format suitable for ordering
+    RNA oligonucleotides.  Each nucleotide is prefixed with 'r' and the
+    sequence is terminated with a 'TT' 3' overhang.
+
+    Args:
+        seq: A 19-nucleotide DNA sequence string representing the sense strand.
+
+    Returns:
+        A list of two strings: [sense_strand_oligo, antisense_strand_oligo],
+        each formatted as individual 'r'-prefixed RNA nucleotides followed by
+        'TT'.  Returns False if the sequence is not exactly 19 nt.
+    """
     if len(seq)!=19:
         assert ValueError("Candidate is not 19nt in length")
         return False
@@ -208,7 +319,16 @@ def makeDsRNA(seq):
     return ["r"+"r".join(seq)+"TT","r"+"r".join(revSeq)+"TT"]
                                          
 def veraMain(fastaFile):
-    """Do it all"""
+    """Runs the full RNA activation (RNAa) dsRNA design pipeline on a FASTA file.
+
+    Opens a FASTA file of promoter sequences, scans each sequence for RNAa
+    dsRNA candidates using scanPromoter, and prints the top 10 results with
+    their positions, sequences, scores, melting temperatures, GC fractions,
+    and formatted oligonucleotide sequences.
+
+    Args:
+        fastaFile: Path to a FASTA file of promoter sequences to scan.
+    """
     handle = open(fastaFile,'r')
     iter = sequencelib.FastaIterator(handle)
     for i in iter:
@@ -219,7 +339,18 @@ def veraMain(fastaFile):
             print("Pos:\t%d\nCandidate:\t%s\nScore:\t%.2f\nTm:\t%.2f\nGC:\t%.2f\nFwd:\t%s\nRev:\t%s\n------------------------" % (candidate['pos'],candidate['seq'],candidate['score'],candidate['Tm'],candidate['gc'],dsRNA[0],dsRNA[1]))
 
 def ASOMain(fastafile):
-    """Takes a fasta sequnce of RNAs, reverse-complements and scans for ASO sequences"""
+    """Runs the full ASO design pipeline on a FASTA file of RNA sequences.
+
+    Opens the FASTA file, reverse-complements each record, scans for
+    antisense oligonucleotide (ASO) candidates using ASOscan, and prints the
+    top 10 uppercase candidates with their positions, sequences, scores,
+    melting temperatures, and GC fractions.  Candidates containing lowercase
+    letters (ambiguous bases) are skipped.
+
+    Args:
+        fastafile: Path to a FASTA file of RNA/DNA sequences for which ASOs
+            should be designed.
+    """
     handle = open(fastafile,'r')
     iter = sequencelib.FastaIterator(handle)
     for i in iter:
diff --git a/src/seqlib/solid.py b/src/seqlib/solid.py
index da0cdef..9775c97 100644
--- a/src/seqlib/solid.py
+++ b/src/seqlib/solid.py
@@ -1,4 +1,15 @@
 #!/usr/bin/python
+"""Utilities for processing Applied Biosystems SOLiD colorspace sequencing data.
+
+Provides the CSSeq class for representing colorspace sequences, iterators for
+reading .csfasta and .qual files, and functions for converting between
+colorspace and DNA space, trimming linker sequences, building unique-read
+tables, and generating FASTQ files compatible with Bowtie.
+
+SOLiD sequencing encodes each base as a color (0-3) that represents the
+transition between successive dinucleotides.  The first character of each
+read is a nucleotide seed; subsequent characters are color codes.
+"""
 import os
 import sys
 
@@ -31,8 +42,33 @@ def linker_oligos(linker = P2_seq):
 #CSSeq Class definition:  Basic class of Colorspace sequence
 #################################################################
 class CSSeq:
-    "Defines the basic sequence class for the pipeline (DNA or CS)"
+    """Represents a single SOLiD colorspace (or DNA-space) sequence read.
+
+    Holds the sequence data, quality scores, and alignment metadata for one
+    SOLiD bead read.  The sequence may be in colorspace (space='CS') or may
+    have been converted to DNA space (space='DNA') via CSToDNA().
+
+    Attributes:
+        name: Read identifier string (bead name).
+        sequence: Sequence string; either colorspace (digits 0-3 prefixed by
+            a nucleotide) or DNA (ACGT) depending on space.
+        readcount: Number of times this read sequence was observed (used when
+            collapsing duplicates to a unique table, default 1).
+        matches: List of match location strings (populated when parsing a
+            .csfasta file with match annotations).
+        qual: List of integer Phred quality scores corresponding to each base.
+        space: Either 'CS' (colorspace, default) or 'DNA' after CSToDNA().
+        trimmed: True once the SOLiD linker has been stripped by
+            strip_solid_linker().
+    """
     def __init__(self,name,sequence,readcount=1):
+        """Initialises a CSSeq.
+
+        Args:
+            name: Read identifier (bead name).
+            sequence: Colorspace sequence string.
+            readcount: Observation count for this sequence (default: 1).
+        """
         self.name = name
         self.sequence = sequence
         self.readcount = readcount
@@ -43,11 +79,15 @@ def __init__(self,name,sequence,readcount=1):
         #self.count = 0
 
     def __len__(self):
+        """Returns the length of the sequence string."""
         return len(self.sequence)
 
     def __str__(self):
+        """Returns the sequence string."""
         return self.sequence
+
     def __repr__(self):
+        """Returns the read name."""
         return self.name
 
 #    def __repr__(self):
@@ -62,15 +102,31 @@ def __repr__(self):
     #    return ('%s\t%s\t%s\t' % (self.name,CSseq,self.sequence))
 
     def returnFasta(self):
+        """Returns the sequence formatted as a two-line FASTA record string.
+
+        Returns:
+            A string of the form '>name\\nsequence'.
+        """
         return ('>%s\n%s' % (self.name,self.sequence))
 
     def returnSHRiMPcsfasta(self):
+        """Returns the sequence in SHRiMP csfasta format with the readcount suffix.
+
+        Returns:
+            A string of the form '>name_xreadcount\\nsequence'.
+        """
         return ('>%s_x%d\n%s') % (self.name,self.readcount,self.sequence)
 
     def returnQual(self):
+        """Returns the quality scores formatted as a two-line FASTA-style qual record.
+
+        Returns:
+            A string of the form '>name\\nq0 q1 q2 ...'.
+        """
         return('>%s\n%s' % (self.name," ".join(q for q in self.qual)))
 
     def printFasta(self):
+        """Prints the sequence as a two-line FASTA record to stdout."""
         print ('>%s\n%s' % (self.name,self.sequence))
 
     def CSToDNA(self):
@@ -127,6 +183,12 @@ def trim_by_qual(self,phredCutoff=10):
         return
 
     def nuIDName(self):
+        """Replaces the read name with the nuID encoding of its DNA sequence.
+
+        Converts the sequence to DNA space first if it is currently in
+        colorspace, then encodes it as a nuID and stores the result in
+        self.name.
+        """
         if self.space == "CS":
             tempString = CS2DNA(self.sequence)
         else:
@@ -229,6 +291,20 @@ def CompIter(csfile,qualfile):
             assert ValueError ("It appears that the sequences don't match...have you modified the .csfasta or .qual files?")
 
 def uniqueTableIterator(handle,trim=True):
+    """Yields CSSeq objects from a tab-delimited unique-reads table.
+
+    Reads a two-column tab-delimited file where column 0 is a colorspace
+    read sequence and column 1 is its observation count.  Assigns nuID names
+    and optionally strips the SOLiD linker.
+
+    Args:
+        handle: Readable file-like object containing the unique-reads table.
+        trim: If True (default), strip the SOLiD P2 linker from each read
+            using strip_solid_linker().
+
+    Yields:
+        CSSeq objects with nuID names and readcount set from the table.
+    """
     for line in handle:
         tokens = line.rstrip().split("\t")
         seq = CSSeq(tokens[0],tokens[0],readcount=int(tokens[1]))
@@ -283,6 +359,16 @@ def makeFastq(csfile,qualfile,shortname,outdir="",split=-1,trim=False):
 ########################################################################
 
 def csfasta2fasta(fname):
+    """Converts a .csfasta file to DNA-space FASTA format and prints to stdout.
+
+    Reads each colorspace record, converts it to DNA space, and prints the
+    result as a FASTA record.  Note: due to a bug the CS2DNA conversion and
+    printFasta are referenced as attributes rather than called, so conversion
+    does not actually occur.
+
+    Args:
+        fname: Path to a .csfasta file.
+    """
     handle=open(fname,'r')
     iter=CSFastaIterator(handle)
     for i in iter:
@@ -338,9 +424,20 @@ def uniqueTable(dir=os.getcwd()):
         print(row)
 
 def filterUnique(uniqueFile,minObs=5):
-    """
-    At this point, this function is specific to the H1U and H1NSC samples
-    I need to change that
+    """Filters a unique-reads table and writes separate .csfasta files per sample.
+
+    Reads a tab-delimited unique-reads table and writes reads that meet the
+    minimum observation threshold to sample-specific .csfasta files.
+
+    Note: This function is hard-coded for exactly two samples (H1U and H1NSC)
+    and writes output to 'H1U.csfasta' and 'H1NSC.csfasta' in the current
+    directory.
+
+    Args:
+        uniqueFile: Path to the tab-delimited unique-reads table produced by
+            uniqueTable().  The header line begins with '#'.
+        minObs: Minimum total observation count required for a read to be
+            written to the output (default: 5).
     """
     handle = open(uniqueFile,'r')
     count = 0
diff --git a/src/seqlib/stats.py b/src/seqlib/stats.py
index bed6b67..07db2e3 100644
--- a/src/seqlib/stats.py
+++ b/src/seqlib/stats.py
@@ -1,3 +1,11 @@
+"""Statistical and mathematical utilities for biological data analysis.
+
+Provides descriptive statistics, probability distributions (PDF and CDF),
+random variates, regression, sliding-window operations, curve fitting, and
+special mathematical functions.  Functions that require external tools (rpy2,
+gnuplot) fall back gracefully or raise ``NotImplementedError`` when those
+dependencies are absent.
+"""
 # python libs
 import cmath
 import os
@@ -16,11 +24,35 @@
 
 
 def prod(lst):
-    """Computes the product of a list of numbers"""
+    """Compute the product of a list of positive numbers via log-space summation.
+
+    Calculates ``exp(sum(log(i) for i in lst))``, which avoids numerical
+    overflow for large lists by working in log space.  All values in
+    ``lst`` must be strictly positive.
+
+    Args:
+        lst: An iterable of strictly positive numbers.
+
+    Returns:
+        The product of all elements in ``lst`` as a float.
+    """
     return exp(sum(log(i) for i in lst))
 
 def mean(vals):
-    """Computes the mean of a list of numbers"""
+    """Compute the arithmetic mean of a sequence of numbers.
+
+    Iterates through ``vals`` once, accumulating the sum and count,
+    then divides to produce the mean.
+
+    Args:
+        vals: An iterable of numeric values.  Must be non-empty.
+
+    Returns:
+        The arithmetic mean as a float.
+
+    Raises:
+        ZeroDivisionError: If ``vals`` is empty.
+    """
     n = 0
     s = 0.0
     for i in vals:
@@ -29,7 +61,17 @@ def mean(vals):
     return s / float(n)
 
 def median(vals):
-    """Computes the median of a list of numbers"""
+    """Compute the median of a list of numbers.
+
+    Sorts ``vals`` and returns the middle value for odd-length lists, or
+    the average of the two middle values for even-length lists.
+
+    Args:
+        vals: A sequence of numeric values.  Must be non-empty.
+
+    Returns:
+        The median value as a float.
+    """
     lenvals = len(vals)
     sortvals = sorted(vals)
 
@@ -39,7 +81,20 @@ def median(vals):
         return sortvals[lenvals // 2]
 
 def mode(vals):
-    """Computes the mode of a list of numbers"""
+    """Compute the mode (most frequently occurring value) of a sequence.
+
+    Uses :class:`collections.Counter` to count occurrences and returns
+    the value with the highest count.  If multiple values share the
+    maximum count, the one encountered first during dict iteration is
+    returned (which is insertion order in Python 3.7+).
+
+    Args:
+        vals: An iterable of hashable values.
+
+    Returns:
+        The most frequently occurring element in ``vals``, or ``None``
+        if ``vals`` is empty.
+    """
     top = 0
     topkey = None
     for key, val in Counter(vals).items():
@@ -50,8 +105,22 @@ def mode(vals):
 
 
 def msqerr(vals1, vals2):
-    """Mean squared error"""
+    """Compute the mean squared error between two equal-length sequences.
 
+    Calculates the average of the squared element-wise differences::
+
+        MSE = mean((vals1[i] - vals2[i])^2  for all i)
+
+    Args:
+        vals1: A sequence of numeric values.
+        vals2: A sequence of numeric values of the same length as ``vals1``.
+
+    Returns:
+        The mean squared error as a float.
+
+    Raises:
+        AssertionError: If ``vals1`` and ``vals2`` have different lengths.
+    """
     assert len(vals1) == len(vals2), "lists are not the same length"
 
 
@@ -61,23 +130,80 @@ def msqerr(vals1, vals2):
 
 
 def variance(vals):
-    """Variance"""
+    """Compute the sample variance of a sequence of numbers.
+
+    Uses Bessel's correction (divides by ``n - 1``) to produce an
+    unbiased estimate of the population variance::
+
+        s^2 = sum((x - mean)^2) / (n - 1)
+
+    Args:
+        vals: A sequence of at least two numeric values.
+
+    Returns:
+        The sample variance as a float.
+
+    Raises:
+        ZeroDivisionError: If ``vals`` has fewer than 2 elements.
+    """
     u = mean(vals)
     return sum((x - u)**2 for x in vals) / float(len(vals)-1)
 
 def var(vals):
+    """Alias for :func:`variance`.
+
+    Args:
+        vals: A sequence of at least two numeric values.
+
+    Returns:
+        The sample variance as a float.
+    """
     return variance(vals)
 
 def sdev(vals):
-    """Standard deviation"""
+    """Compute the sample standard deviation of a sequence of numbers.
+
+    Returns the square root of the sample variance computed by
+    :func:`variance` (Bessel-corrected, ``n - 1`` denominator).
+
+    Args:
+        vals: A sequence of at least two numeric values.
+
+    Returns:
+        The sample standard deviation as a float.
+    """
     return sqrt(variance(vals))
 
 def serror(vals):
-    """Stanadrd error"""
+    """Compute the standard error of the mean of a sequence of numbers.
+
+    Divides the sample standard deviation by the square root of the
+    sample size::
+
+        SE = sdev(vals) / sqrt(n)
+
+    Args:
+        vals: A sequence of at least two numeric values.
+
+    Returns:
+        The standard error of the mean as a float.
+    """
     return sdev(vals) / sqrt(len(vals))
 
 def covariance(lst1, lst2):
-    """Covariance"""
+    """Compute the sample covariance between two equal-length sequences.
+
+    Uses Bessel's correction (divides by ``n - 1``)::
+
+        cov(X, Y) = sum((x - mean_x) * (y - mean_y)) / (n - 1)
+
+    Args:
+        lst1: A sequence of numeric values.
+        lst2: A sequence of numeric values of the same length as ``lst1``.
+
+    Returns:
+        The sample covariance as a float.
+    """
     m1 = mean(lst1)
     m2 = mean(lst2)
     tot = 0.0
@@ -87,14 +213,37 @@ def covariance(lst1, lst2):
 
 
 def covmatrix(mat):
-    """Covariance Matrix"""
+    """Compute the full pairwise sample covariance matrix for a list of sequences.
+
+    Evaluates :func:`covariance` for every pair ``(i, j)`` of rows in
+    ``mat`` (including self-covariances on the diagonal, which equal the
+    sample variance of that row).
+
+    Args:
+        mat: A list of ``n`` equal-length numeric sequences (rows).
+
+    Returns:
+        A ``(n, n)`` NumPy array where element ``[i, j]`` is the sample
+        covariance between ``mat[i]`` and ``mat[j]``.
+    """
     size = len(mat)
 
     flat = [covariance(mat[i], mat[j]) for i,j in ((i,j) for i in range(size) for j in range(size))]
     return np.array(flat).reshape(size, size)
 
 def corrmatrix(mat):
-    """Correlation Matrix"""
+    """Compute the full pairwise Pearson correlation matrix for a list of sequences.
+
+    Evaluates :func:`corr` for every pair ``(i, j)`` of rows in
+    ``mat`` (including self-correlations of 1.0 on the diagonal).
+
+    Args:
+        mat: A list of ``n`` equal-length numeric sequences (rows).
+
+    Returns:
+        A ``(n, n)`` NumPy array where element ``[i, j]`` is the Pearson
+        correlation coefficient between ``mat[i]`` and ``mat[j]``.
+    """
     size = len(mat)
 
     flat = [corr(mat[i], mat[j]) for i,j in ((i,j) for i in range(size) for j in range(size))]
@@ -102,7 +251,23 @@ def corrmatrix(mat):
 
 
 def corr(lst1, lst2):
-    """Pearson's Correlation"""
+    """Compute the Pearson correlation coefficient between two sequences.
+
+    Calculates::
+
+        r = cov(lst1, lst2) / (sdev(lst1) * sdev(lst2))
+
+    If the denominator is zero (one or both sequences have zero variance),
+    returns ``1e1000`` (effectively infinity) as a sentinel value.
+
+    Args:
+        lst1: A sequence of numeric values.
+        lst2: A sequence of numeric values of the same length as ``lst1``.
+
+    Returns:
+        The Pearson correlation coefficient as a float in [-1, 1], or
+        ``1e1000`` if either sequence has zero standard deviation.
+    """
     num = covariance(lst1, lst2)
     denom = float(sdev(lst1) * sdev(lst2))
     if denom != 0:
@@ -112,8 +277,26 @@ def corr(lst1, lst2):
 
 
 def qqnorm(data, plot=None):
-    """Quantile-quantile plot"""
-
+    """Generate data for a normal quantile-quantile (Q-Q) plot.
+
+    Sorts ``data`` and generates an equal-length sample from the standard
+    normal distribution (mean 0, sigma 1), also sorted.  The two sorted
+    sequences can be plotted against each other to assess normality.
+
+    Args:
+        data: A sequence of numeric values to compare against the normal
+            distribution.
+        plot: An optional plot object with a ``plot(x, y)`` method.  If
+            provided, the Q-Q data are passed to ``plot.plot`` and the
+            plot object is returned.  Defaults to ``None``.
+
+    Returns:
+        If ``plot`` is ``None``: a 2-tuple ``(data2, norm)`` where
+        ``data2`` is the sorted input data and ``norm`` is a sorted
+        sample from N(0, 1) of the same length.
+        If ``plot`` is provided: the ``plot`` object after calling
+        ``plot.plot(data2, norm)``.
+    """
     data2 = sorted(data)
     norm = [random.normalvariate(0, 1) for x in range(len(data2))]
     norm.sort()
@@ -128,8 +311,25 @@ def qqnorm(data, plot=None):
 
 
 def fitLine(xlist, ylist):
-    """2D regression"""
+    """Fit a least-squares line to 2-D data and return slope and intercept.
+
+    Uses the ordinary least-squares closed-form formula::
 
+        slope = (sum(x*y) - n*mean_x*mean_y) / (sum(x^2) - n*mean_x^2)
+        inter = mean_y - slope * mean_x
+
+    If the denominator is zero (all x values are identical), slope is set
+    to ``1e10`` as a sentinel for a vertical line.
+
+    Args:
+        xlist: A sequence of x-coordinates (numeric).
+        ylist: A sequence of y-coordinates (numeric) of the same length
+            as ``xlist``.
+
+    Returns:
+        A 2-tuple ``(slope, inter)`` where ``slope`` is the gradient and
+        ``inter`` is the y-intercept of the fitted line.
+    """
     xysum = 0
     xxsum = 0
     n = len(xlist)
@@ -150,7 +350,23 @@ def fitLine(xlist, ylist):
 
 
 def fitLineError(xlist, ylist, slope, inter):
-    """Returns the Mean Square Error of the data fit"""
+    """Compute the mean squared error of a linear fit against data.
+
+    Evaluates the fitted line ``y_hat = slope * x + inter`` at each x
+    and averages the squared residuals::
+
+        MSE = sum((slope*x_i + inter - y_i)^2) / n
+
+    Args:
+        xlist: A sequence of x-coordinates (numeric).
+        ylist: A sequence of observed y-coordinates of the same length
+            as ``xlist``.
+        slope: The slope of the fitted line.
+        inter: The y-intercept of the fitted line.
+
+    Returns:
+        The mean squared error of the linear fit as a float.
+    """
     error = 0
     n = len(xlist)
 
@@ -160,8 +376,27 @@ def fitLineError(xlist, ylist, slope, inter):
 
 
 def pearsonsRegression(observed, expected):
-    """Pearson's coefficient of regression"""
+    """Compute the Pearson coefficient of determination (R^2).
 
+    Measures how well ``expected`` values explain the variance of
+    ``observed``::
+
+        R^2 = 1 - ESS / TSS
+
+    where ``ESS = sum((observed - expected)^2)`` is the error sum of
+    squares and ``TSS = sum((observed - mean(observed))^2)`` is the
+    total sum of squares.
+
+    Args:
+        observed: A sequence of observed (actual) numeric values.
+        expected: A sequence of predicted values of the same length as
+            ``observed``.
+
+    Returns:
+        R^2 as a float.  A value of 1.0 indicates a perfect fit;
+        values near 0 indicate no explanatory power; negative values
+        indicate the model is worse than predicting the mean.
+    """
     # error sum of squares
     ess = sum((a - b)**2 for a, b in zip(observed, expected))
 
@@ -174,6 +409,22 @@ def pearsonsRegression(observed, expected):
 
 
 def pearsonsRegressionLine(x, y, m, b):
+    """Compute R^2 for data against a linear model y = m*x + b.
+
+    Generates expected values from the line ``y = m*x + b`` and
+    delegates to :func:`pearsonsRegression`.
+
+    Args:
+        x: A sequence of x-coordinates (numeric).
+        y: A sequence of observed y-coordinates of the same length as
+            ``x``.
+        m: The slope of the reference line.
+        b: The y-intercept of the reference line.
+
+    Returns:
+        R^2 as a float indicating goodness of fit of the linear model
+        to the observed data.
+    """
     observed = y
     expected = [m*i + b for i in x]
     return pearsonsRegression(observed, expected)
@@ -181,11 +432,27 @@ def pearsonsRegressionLine(x, y, m, b):
 
 
 def percentile(vals, perc, rounding=-1, sort=True):
-    """Give the value at a percentile
-
-       rounding -- round down if -1 or round up for 1
+    """Return the value at a given percentile of a sequence.
+
+    Optionally sorts ``vals`` and returns the element at index
+    ``int(perc * n)`` (round down) or ``ceil(perc * n)`` (round up),
+    clamped to valid list indices.
+
+    Args:
+        vals: A sequence of numeric values.
+        perc: The desired percentile as a fraction in [0, 1] (e.g. 0.5
+            for the median, 0.95 for the 95th percentile).
+        rounding: Controls how the fractional index is resolved.
+            Use ``-1`` to floor (default) or ``1`` to ceiling.
+        sort: If ``True`` (default), sort ``vals`` before indexing.
+            Pass ``False`` if ``vals`` is already sorted to save time.
+
+    Returns:
+        The value in ``vals`` at the requested percentile.
+
+    Raises:
+        Exception: If ``rounding`` is not ``-1`` or ``1``.
     """
-
     if sort:
         vals2 = sorted(vals)
     else:
@@ -200,8 +467,22 @@ def percentile(vals, perc, rounding=-1, sort=True):
 
 
 def logadd(lna, lnb):
-    """Adding numbers in log-space"""
+    """Add two numbers represented in log space without underflow.
+
+    Computes ``log(exp(lna) + exp(lnb))`` in a numerically stable way::
+
+        logadd(lna, lnb) = log(exp(lna - lnb) + 1) + lnb
+
+    When ``lna - lnb >= 500`` the second term is negligible and ``lna``
+    is returned directly to avoid overflow.
+
+    Args:
+        lna: The natural log of the first value.
+        lnb: The natural log of the second value.
 
+    Returns:
+        The natural log of the sum ``exp(lna) + exp(lnb)`` as a float.
+    """
     diff = lna - lnb
     if diff < 500:
         return log(exp(diff) + 1.0) + lnb
@@ -211,13 +492,25 @@ def logadd(lna, lnb):
 
 
 def smooth(vals, radius):
-    """
-    return an averaging of vals using a radius
+    """Smooth a sequence by replacing each value with a local window average.
 
-    Note: not implemented as fast as possible
-    runtime: O(len(vals) * radius)
-    """
+    For each position ``i``, computes the mean of the sub-list
+    ``vals[i - r : i + r + 1]`` where ``r = min(i, vlen - i - 1, radius)``
+    ensures the window stays within array bounds.  Values near the
+    edges therefore use a smaller effective radius.
+
+    Note:
+        Not implemented as fast as possible.
+        Runtime is O(len(vals) * radius).
 
+    Args:
+        vals: A sequence of numeric values.
+        radius: The maximum half-width of the averaging window (the
+            window spans at most ``2*radius + 1`` elements).
+
+    Returns:
+        A list of smoothed values of the same length as ``vals``.
+    """
     vals2 = []
     vlen = len(vals)
 
@@ -231,12 +524,27 @@ def smooth(vals, radius):
 
 
 def iter_window_index(x, xdist, esp=None):
-    """
-    iterates a sliding window over x with radius xradius
-
-    returns an iterator over list of indices in x that represent windows
-
-    x must be sorted least to greatest
+    """Iterate sliding-window index ranges over a sorted value sequence.
+
+    Advances a window of fixed width ``xdist`` along the value axis of
+    a sorted sequence ``x``, yielding the array-index bounds and value
+    bounds of the window each time a point enters or exits it.
+
+    The window boundaries are updated one step at a time: the lower
+    bound advances whenever the leading point would be expelled, and the
+    upper bound advances to admit the next point.
+
+    Args:
+        x: A sorted (ascending) list of numeric values.
+        xdist: The width of the sliding window in the same units as
+            values in ``x``.
+        esp: Unused parameter retained for API compatibility.
+
+    Yields:
+        4-tuples ``(lowi, highi, low, high)`` where ``lowi`` and
+        ``highi`` are the inclusive index bounds of the current window
+        in ``x``, and ``low`` / ``high`` are the corresponding value
+        boundaries.
     """
 
     vlen = len(x)
@@ -294,7 +602,25 @@ def iter_window_index(x, xdist, esp=None):
 
 
 def iter_window_index_step(x, size, step, minsize=0):
-
+    """Iterate fixed-step sliding-window index ranges over a sorted value sequence.
+
+    Advances a window of fixed width ``size`` in increments of ``step``
+    along the value axis, yielding index and value bounds for each
+    window position that contains at least ``minsize`` points.
+
+    Args:
+        x: A sorted (ascending) list of numeric values.
+        size: The width of each window in the same units as ``x``.
+        step: The distance to advance the window centre between successive
+            yields.
+        minsize: Minimum number of points that must be inside the window
+            for it to be yielded.  Defaults to 0.
+
+    Yields:
+        4-tuples ``(lowi, highi, low, high)`` where ``lowi`` and
+        ``highi`` are the inclusive index bounds of the current window
+        in ``x``, and ``low`` / ``high`` are the value boundaries.
+    """
     vlen = len(x)
     start = x[0]
     end = x[-1]
@@ -328,32 +654,73 @@ def iter_window_index_step(x, size, step, minsize=0):
 
 
 def iter_window(x, xdist, func=lambda win: win, minsize=0):
+    """Apply a function to each sliding window over a sorted sequence.
+
+    Wraps :func:`iter_window_index` and yields the window midpoint
+    together with ``func`` applied to the window slice.
+
+    Note:
+        The internal call uses ``xsize`` rather than ``xdist``; this is
+        a latent bug in the original code and is preserved here.
+
+    Args:
+        x: A sorted (ascending) list of numeric values.
+        xdist: The width of the sliding window.
+        func: A callable applied to each window slice ``x[lowi:highi]``.
+            Defaults to the identity function.
+        minsize: Minimum number of points in the window before it is
+            yielded.  Defaults to 0.
+
+    Yields:
+        2-tuples ``(midpoint, func(window))`` where ``midpoint`` is
+        ``(low + high) / 2`` and ``window`` is the slice of ``x``
+        within the current bounds.
     """
-    iterates a sliding window over x with radius xradius
-
-    x must be sorted least to greatest
-    """
-
     for lowi, highi, low, high in iter_window_index(x, xsize):
         if highi - lowi >= minsize:
             yield (high + low)/2.0, func(x[lowi:highi])
 
 
 def iter_window_step(x, width, step, func=lambda win: win, minsize=0):
+    """Apply a function to each fixed-step sliding window over a sorted sequence.
+
+    Wraps :func:`iter_window_index_step` and yields the window midpoint
+    together with ``func`` applied to the window slice.  ``x`` must be
+    sorted in ascending order.
+
+    Args:
+        x: A sorted (ascending) list of numeric values.
+        width: The width of each window in the same units as ``x``.
+        step: The distance to advance the window between successive yields.
+        func: A callable applied to each window slice ``x[lowi:highi]``.
+            Defaults to the identity function.
+        minsize: Minimum number of points that must be in the window for
+            it to be yielded.  Defaults to 0.
+
+    Yields:
+        2-tuples ``(midpoint, func(window))`` where ``midpoint`` is
+        ``(low + high) / 2.0`` and ``window`` is the slice of ``x``
+        within the current bounds.
     """
-    iterates a sliding window over x with width 'width'
-
-    x must be sorted least to greatest
-
-    return an iterator with (midx, func(x[lowi:highi]))
-    """
-
     for lowi, highi, low, high in iter_window_index_step(x, width, step, minsize):
         yield (high + low) / 2.0, func(x[lowi:highi])
 
 
 def _sortTogether(x, y):
-    """Sort x and y together by x values."""
+    """Sort two sequences together by the values of ``x``.
+
+    Zips ``x`` and ``y`` into pairs, sorts by the first element of each
+    pair, then unzips back into two separate lists.
+
+    Args:
+        x: A sequence of sortable values used as the sort key.
+        y: A sequence of values of the same length as ``x``.
+
+    Returns:
+        A 2-tuple ``(x2, y2)`` where both lists have been reordered so
+        that ``x2`` is sorted ascending.  Returns ``([], [])`` if ``x``
+        is empty.
+    """
     if not x:
         return [], []
     pairs = sorted(zip(x, y))
@@ -362,10 +729,28 @@ def _sortTogether(x, y):
 
 
 def smooth2(x, y, xradius, minsize=0, sort=False):
-    """
-    return an averaging of x and y using xradius
-
-    x must be sorted least to greatest
+    """Smooth paired (x, y) data by averaging within a sliding x-radius window.
+
+    For each point ``x[i]``, the window spans all points whose x-value
+    lies within ``[x[i] - r, x[i] + r]`` where
+    ``r = min(x[i] - min(x), max(x) - x[i], xradius)`` so that the
+    effective radius shrinks near the data boundaries.
+
+    Args:
+        x: A sorted (ascending) list of x-coordinates.  Must be
+            non-empty and of the same length as ``y``.
+        y: A list of y-values corresponding to ``x``.
+        xradius: The maximum half-width of the averaging window in
+            the same units as ``x``.
+        minsize: Minimum number of points that must be in the window
+            for the averaged point to be included in the output.
+            Defaults to 0.
+        sort: If ``True``, sort ``x`` and ``y`` together by ``x`` before
+            smoothing.  Defaults to ``False``.
+
+    Returns:
+        A 2-tuple ``(x2, y2)`` of lists containing the smoothed x and y
+        values.  Returns ``([], [])`` if ``x`` is empty.
     """
 
     vlen = len(x)
@@ -413,8 +798,23 @@ def smooth2(x, y, xradius, minsize=0, sort=False):
 
 
 def factorial(x, k=1):
-    """Simple implementation of factorial"""
-
+    """Compute the partial factorial product x! / k!.
+
+    Calculates the product of all integers from ``k+1`` to ``x``
+    inclusive.  When ``k=1`` (the default) this is the standard
+    factorial ``x!``.  When ``k > 1`` it returns the falling factorial
+    ``x! / k!``.
+
+    Args:
+        x: The upper bound of the product (inclusive).  Converted to
+            ``int`` internally.
+        k: The lower bound; the product starts at ``k+1``.  Defaults
+            to 1.
+
+    Returns:
+        An integer equal to ``(k+1) * (k+2) * ... * x``, or 1 if the
+        range is empty (i.e. ``x <= k``).
+    """
     n = 1
     for i in range(int(k)+1, int(x)+1):
         n *= i
@@ -422,8 +822,22 @@ def factorial(x, k=1):
 
 
 def logfactorial(x, k=1):
-    """returns the log(factorial(x) / factorial(k)"""
+    """Compute log(x! / k!) in log space.
+
+    Returns the natural log of the partial factorial product
+    ``(k+1) * (k+2) * ... * x`` by summing ``log(i)`` terms.  This
+    avoids integer overflow for large ``x``.
 
+    Args:
+        x: The upper bound of the product (inclusive).  Converted to
+            ``int`` internally.
+        k: The lower bound; the product starts at ``k+1``.  Defaults
+            to 1.
+
+    Returns:
+        A float equal to ``log((k+1)) + log(k+2) + ... + log(x)``,
+        or 0.0 if the range is empty.
+    """
     n = 0
     for i in range(int(k)+1, int(x)+1):
         n += log(i)
@@ -431,6 +845,20 @@ def logfactorial(x, k=1):
 
 
 def choose(n, k):
+    """Compute the binomial coefficient C(n, k) = n! / (k! * (n-k)!).
+
+    Uses a multiplicative formula for efficiency, exploiting the
+    symmetry ``C(n, k) == C(n, n-k)`` to minimise the number of
+    multiplications.  Returns the result rounded to the nearest integer.
+
+    Args:
+        n: The total number of items.
+        k: The number of items to choose.
+
+    Returns:
+        An integer equal to C(n, k).  Returns 1.0 when both ``n`` and
+        ``k`` are 0, and 0 when any argument is negative or ``k > n``.
+    """
     if n == 0 and k == 0:
         return 1.0
 
@@ -449,19 +877,44 @@ def choose(n, k):
 
 
 def _oneNorm(weights):
-    """Normalize a list of weights to sum to 1."""
+    """Normalise a list of weights so they sum to 1.
+
+    Divides each weight by the total sum of all weights.
+
+    Args:
+        weights: A list of non-negative numeric values whose sum is
+            positive.
+
+    Returns:
+        A new list of floats of the same length as ``weights`` that
+        sum to 1.0.
+    """
     s = sum(weights)
     return [w / s for w in weights]
 
 
 def sample(weights):
-    """
-    Randomly choose an int between 0 and len(probs)-1 using
-    the weights stored in list probs.
+    """Randomly choose an index proportional to the given weights.
 
-    item i will be chosen with probability weights[i]/sum(weights)
-    """
+    Normalises ``weights`` to a proper probability distribution and then
+    samples using a CDF built from the normalised weights and a binary
+    search via :func:`algorithms.binsearch`.
+
+    Item ``i`` is chosen with probability ``weights[i] / sum(weights)``.
 
+    Args:
+        weights: A list of non-negative numeric values.  The length
+            determines the range of possible return values (0 to
+            ``len(weights) - 1``).
+
+    Returns:
+        An integer index into ``weights``, selected with probability
+        proportional to each weight.
+
+    Raises:
+        AssertionError: If ``algorithms.binsearch`` returns ``None`` for
+            the lower bound, indicating an unexpected state.
+    """
     probs = _oneNorm(weights)
 
     cdf = [0]
@@ -478,12 +931,31 @@ def sample(weights):
 
 
 def chyper(m, n, M, N, report=0):
-    '''
-    calculates cumulative probability based on
-    hypergeometric distribution
-    over/under/both (report = 0/1/2)
-    (uses /seq/compbio02/software-Linux/misc/chyper)
-    '''
+    """Compute a hypergeometric cumulative probability via an external ``chyper`` binary.
+
+    Models drawing ``n`` balls from an urn containing ``N`` balls of which
+    ``M`` are white (successes).  ``m`` is the number of white balls drawn.
+    Calls the external command-line tool ``chyper`` and parses its output.
+
+    Args:
+        m: Number of white balls drawn (observed successes).  Must be
+            an ``int`` with ``m <= n`` and ``m <= M``.
+        n: Total balls drawn.  Must be an ``int`` with ``n <= N``.
+        M: Total white balls in urn.  Must be an ``int``.
+        N: Total balls in urn.  Must be an ``int``.
+        report: Controls which tail(s) are returned.
+            ``0`` — p-value for over-representation (default).
+            ``1`` — p-value for under-representation.
+            ``2`` — 2-tuple ``(over_p, under_p)``.
+
+    Returns:
+        A float p-value, or a list of two floats when ``report=2``.
+
+    Raises:
+        AssertionError: If arguments do not satisfy type or range constraints.
+        Exception: If the ``chyper`` command produces no output.
+        Exception: If ``report`` is not 0, 1, or 2.
+    """
 
     assert( (type(m) == type(n) == type(M) == type(N) == int)
             and m <= n and m <= M and n <= N)
@@ -511,18 +983,31 @@ def chyper(m, n, M, N, report=0):
 
 
 def rhyper(m, n, M, N, report=0):
-    '''
-    calculates cumulative probability based on
-    hypergeometric distribution
-    over/under/both (report = 0/1/2)
-    (uses R through RPy2)
-
-    N = total balls in urn
-    M = total white balls in urn
-    n = drawn balls from urn
-    m = drawn white balls from urn
-
-    '''
+    """Compute a hypergeometric cumulative probability via R (rpy2).
+
+    Models drawing ``n`` balls from an urn containing ``N`` balls of which
+    ``M`` are white (successes).  ``m`` is the number of white balls drawn.
+    Uses R's ``phyper`` function via the rpy2 interface.
+
+    Args:
+        m: Number of white balls drawn (observed successes).  Must be
+            an ``int`` with ``m <= n`` and ``m <= M``.
+        n: Total balls drawn.  Must be an ``int`` with ``n <= N``.
+        M: Total white balls in urn.  Must be an ``int``.
+        N: Total balls in urn.  Must be an ``int``.
+        report: Controls which tail(s) are returned.
+            ``0`` — p-value for over-representation, i.e.
+            ``P(X >= m)`` (default).
+            ``1`` — p-value for under-representation, i.e. ``P(X <= m)``.
+            ``2`` — 2-tuple ``(over_p, under_p)``.
+
+    Returns:
+        A float p-value, or a 2-tuple of floats when ``report=2``.
+
+    Raises:
+        AssertionError: If arguments do not satisfy type or range constraints.
+        Exception: If ``report`` is not 0, 1, or 2.
+    """
 
     import rpy2.robjects as r_module
     r = r_module.r
@@ -543,8 +1028,19 @@ def rhyper(m, n, M, N, report=0):
         raise Exception("unknown option")
 
 def cdf(vals):
-    """Computes the CDF of a list of values"""
+    """Compute the empirical cumulative distribution function (ECDF) of a list.
+
+    Sorts ``vals`` and assigns each unique value a cumulative probability
+    equal to its 0-based rank divided by the total number of values.
 
+    Args:
+        vals: A sequence of numeric values.
+
+    Returns:
+        A 2-tuple ``(x, y)`` where ``x`` is the sorted list of values and
+        ``y`` is the corresponding list of cumulative probabilities in
+        [0, 1).
+    """
     vals = sorted(vals)
     tot = float(len(vals))
     x = []
@@ -558,8 +1054,31 @@ def cdf(vals):
 
 
 def enrichItems(in_items, out_items, M=None, N=None, useq=True, extra=False):
-    """Calculates enrichment for items within an in-set vs and out-set.
-       Returns a sorted DataFrame.
+    """Calculate item enrichment between an in-set and an out-set.
+
+    Counts how often each item appears in ``in_items`` vs ``out_items`` and
+    tests for enrichment using the hypergeometric distribution via
+    :func:`rhyper`.  Optionally adjusts p-values to q-values (FDR) and
+    adds fold-enrichment columns.
+
+    Args:
+        in_items: An iterable of items in the foreground (in-set).
+        out_items: An iterable of items in the background (out-set).
+        M: The foreground population size.  Defaults to
+            ``len(in_items)``.
+        N: The total population size.  Defaults to
+            ``len(in_items) + len(out_items)``.
+        useq: If ``True`` (default), add ``qval`` and ``qval_under``
+            columns computed via FDR correction using :func:`qvalues`.
+        extra: If ``True``, add columns ``in_size``, ``out_size``,
+            ``item_ratio``, ``size_ratio``, and ``fold`` for fold-
+            enrichment analysis.  Defaults to ``False``.
+
+    Returns:
+        A :class:`pandas.DataFrame` sorted by ``pval`` (ascending) with
+        columns ``item``, ``in_count``, ``out_count``, ``pval``,
+        ``pval_under``, and optionally ``qval``, ``qval_under``, and
+        fold-enrichment columns.
     """
 
     # count items using defaultdict instead of rasmus util.Dict
@@ -607,11 +1126,34 @@ def enrichItems(in_items, out_items, M=None, N=None, useq=True, extra=False):
 
 
 def qvalues(pvals):
+    """Compute Benjamini-Hochberg FDR-adjusted p-values (q-values) via R.
+
+    Calls R's ``p.adjust`` function with ``method='fdr'`` through rpy2.
+
+    Args:
+        pvals: A list of raw p-values (floats in [0, 1]).
+
+    Returns:
+        A list of FDR-adjusted p-values (q-values) of the same length
+        as ``pvals``.
+    """
     import rpy2.robjects as robjects
     ret = robjects.r['p.adjust'](robjects.FloatVector(pvals), 'fdr')
     return list(ret)
 
 def qvalues2(pvals):
+    """Compute q-values using the Storey-Tibshirani method via R's qvalue package.
+
+    Loads the ``qvalue`` R package through rpy2 and calls ``qvalue()`` on
+    the provided p-values.
+
+    Args:
+        pvals: A list of raw p-values (floats in [0, 1]).
+
+    Returns:
+        A list of q-values of the same length as ``pvals`` as computed
+        by the Storey-Tibshirani estimator.
+    """
     import rpy2.robjects as robjects
     robjects.r['library']('qvalue')
     ret = robjects.r['qvalue'](robjects.FloatVector(pvals))
@@ -623,6 +1165,18 @@ def qvalues2(pvals):
 #
 
 def uniformPdf(x, params):
+    """Evaluate the Uniform(a, b) probability density function at ``x``.
+
+    Returns ``1 / (b - a)`` when ``a <= x <= b``, and 0 otherwise.
+
+    Args:
+        x: The point at which to evaluate the PDF.
+        params: A 2-tuple ``(a, b)`` defining the lower and upper bounds
+            of the uniform distribution.
+
+    Returns:
+        The PDF value at ``x`` as a float.
+    """
     a, b = params
     if x < a or x > b:
         return 0.0
@@ -631,37 +1185,137 @@ def uniformPdf(x, params):
 
 
 def binomialPdf(k, params):
+    """Evaluate the Binomial(n, p) probability mass function at ``k``.
+
+    Computes::
+
+        P(X = k) = C(n, k) * p^k * (1 - p)^(n - k)
+
+    Args:
+        k: The number of successes (non-negative integer).
+        params: A 2-tuple ``(p, n)`` where ``p`` is the success probability
+            per trial and ``n`` is the total number of trials.
+
+    Returns:
+        The probability of exactly ``k`` successes as a float.
+    """
     p, n = params
     return choose(n, k) * (p ** k) * ((1.0-p) ** (n - k))
 
 def gaussianPdf(x, params):
+    """Evaluate the standard Normal N(0, 1) probability density function at ``x``.
+
+    Computes::
+
+        f(x) = (1 / sqrt(2*pi)) * exp(-x^2 / 2)
+
+    Note:
+        The ``params`` argument is accepted but ignored; this function
+        always evaluates the standard normal (mean 0, variance 1).
+
+    Args:
+        x: The point at which to evaluate the PDF.
+        params: Unused.  Accepted for API consistency with other PDF
+            functions.
+
+    Returns:
+        The standard normal PDF value at ``x`` as a float.
+    """
     return 1/sqrt(2*pi) * exp(- x**2 / 2.0)
 
 def normalPdf(x, params):
+    """Evaluate the Normal(mu, sigma) probability density function at ``x``.
+
+    Computes::
+
+        f(x) = (1 / (sigma * sqrt(2*pi))) * exp(-(x - mu)^2 / (2*sigma^2))
+
+    Args:
+        x: The point at which to evaluate the PDF.
+        params: A 2-tuple ``(mu, sigma)`` — the mean and standard
+            deviation of the normal distribution.
+
+    Returns:
+        The normal PDF value at ``x`` as a float.
+    """
     mu, sigma = params
     return 1.0/(sigma * sqrt(2.0*pi)) * exp(- (x - mu)**2 / (2.0 * sigma**2))
 
 def normalCdf(x, params):
+    """Evaluate the Normal(mu, sigma) cumulative distribution function at ``x``.
+
+    Computes::
+
+        F(x) = (1 + erf((x - mu) / (sigma * sqrt(2)))) / 2
+
+    Args:
+        x: The point at which to evaluate the CDF.
+        params: A 2-tuple ``(mu, sigma)`` — the mean and standard
+            deviation of the normal distribution.
+
+    Returns:
+        The cumulative probability P(X <= x) as a float in [0, 1].
+    """
     mu, sigma = params
     return (1 + erf((x - mu)/(sigma * sqrt(2)))) / 2.0
 
 def logNormalPdf(x, params):
-    """mu and sigma are the mean and standard deviation of the
-       variable's logarithm"""
+    """Evaluate the log-normal probability density function at ``x``.
+
+    The log-normal distribution describes a variable whose natural
+    logarithm is normally distributed.  The PDF is::
+
+        f(x) = (1 / (x * sigma * sqrt(2*pi))) * exp(-(log(x) - mu)^2 / (2*sigma^2))
 
+    Args:
+        x: The point at which to evaluate the PDF.  Must be positive.
+        params: A 2-tuple ``(mu, sigma)`` — the mean and standard
+            deviation of the variable's natural logarithm.
+
+    Returns:
+        The log-normal PDF value at ``x`` as a float.  Returns nonsensical
+        values for ``x <= 0``.
+    """
     mu, sigma = params
     return 1/(x * sigma * sqrt(2*pi)) * \
            exp(- (log(x) - mu)**2 / (2.0 * sigma**2))
 
 def logNormalCdf(x, params):
-    """mu and sigma are the mean and standard deviation of the
-       variable's logarithm"""
+    """Evaluate the log-normal cumulative distribution function at ``x``.
 
+    Computes::
+
+        F(x) = (1 + erf((log(x) - mu) / (sigma * sqrt(2)))) / 2
+
+    Args:
+        x: The point at which to evaluate the CDF.  Must be positive.
+        params: A 2-tuple ``(mu, sigma)`` — the mean and standard
+            deviation of the variable's natural logarithm.
+
+    Returns:
+        The cumulative probability P(X <= x) as a float in [0, 1].
+    """
     mu, sigma = params
     return (1 + erf((log(x) - mu)/(sigma * sqrt(2)))) / 2.0
 
 
 def poissonPdf(x, params):
+    """Evaluate the Poisson probability mass function at ``x``.
+
+    Computes the probability in log space to avoid overflow::
+
+        P(X = x) = exp(-lambda) * lambda^x / x!
+                 = exp(-lambda + sum(log(lambda/i) for i in 1..x))
+
+    Args:
+        x: The number of events (non-negative integer).
+        params: A 1-tuple or list whose first element is ``lambda``
+            (the expected number of events, must be positive).
+
+    Returns:
+        The Poisson PMF value P(X = x) as a float.  Returns 0.0 if
+        ``x < 0`` or ``lambda <= 0``.
+    """
     lambd = params[0]
 
     if x < 0 or lambd <= 0:
@@ -674,7 +1328,26 @@ def poissonPdf(x, params):
 
 
 def poissonCdf(x, params):
-    """Cumulative distribution function of the Poisson distribution"""
+    """Evaluate the Poisson cumulative distribution function at ``x``.
+
+    Computes P(X <= x) using the regularised incomplete gamma function::
+
+        F(x; lambda) = (Gamma(floor(x+1)) - gammainc(floor(x+1), lambda))
+                       / floor(x)!
+
+    Note:
+        Not implemented accurately for large ``x`` or ``lambda``.
+
+    Args:
+        x: The upper bound (non-negative number; floor is taken
+            internally).
+        params: A 1-tuple or list whose first element is ``lambda``
+            (the expected number of events).
+
+    Returns:
+        The cumulative probability P(X <= x) as a float, or 0 if
+        ``x < 0``.
+    """
     # NOTE: not implemented accurately for large x or lambd
     lambd = params[0]
 
@@ -686,7 +1359,19 @@ def poissonCdf(x, params):
 
 
 def poissonvariate(lambd):
-    """Sample from a Poisson distribution"""
+    """Draw a random sample from a Poisson distribution.
+
+    Uses Knuth's algorithm: generate uniform random variables and
+    multiply them together until their product falls below
+    ``exp(-lambda)``.  The count of multiplications minus one is the
+    Poisson variate.
+
+    Args:
+        lambd: The expected number of events per interval (lambda > 0).
+
+    Returns:
+        A non-negative integer drawn from Poisson(lambda).
+    """
     l = exp(-lambd)
     k = 0
     p = 1.0
@@ -698,6 +1383,21 @@ def poissonvariate(lambd):
             return k - 1
 
 def exponentialPdf(x, params):
+    """Evaluate the Exponential(lambda) probability density function at ``x``.
+
+    Computes::
+
+        f(x; lambda) = lambda * exp(-lambda * x)   for x >= 0, lambda >= 0
+
+    Args:
+        x: The point at which to evaluate the PDF.
+        params: A 1-tuple or list whose first element is ``lambda``
+            (the rate parameter).
+
+    Returns:
+        The exponential PDF value at ``x`` as a float.  Returns 0.0 if
+        ``x < 0`` or ``lambda < 0``.
+    """
     lambd = params[0]
 
     if x < 0 or lambd < 0:
@@ -707,6 +1407,21 @@ def exponentialPdf(x, params):
 
 
 def exponentialCdf(x, params):
+    """Evaluate the Exponential(lambda) cumulative distribution function at ``x``.
+
+    Computes::
+
+        F(x; lambda) = 1 - exp(-lambda * x)   for x >= 0, lambda >= 0
+
+    Args:
+        x: The point at which to evaluate the CDF.
+        params: A 1-tuple or list whose first element is ``lambda``
+            (the rate parameter).
+
+    Returns:
+        The cumulative probability P(X <= x) as a float.  Returns 0.0 if
+        ``x < 0`` or ``lambda < 0``.
+    """
     lambd = params[0]
 
     if x < 0 or lambd < 0:
@@ -716,9 +1431,36 @@ def exponentialCdf(x, params):
 
 
 def exponentialvariate(lambd):
+    """Draw a random sample from an Exponential(lambda) distribution.
+
+    Uses the inverse CDF (quantile) method: if U ~ Uniform(0,1) then
+    ``-log(U) / lambda`` is Exponentially distributed with rate ``lambda``.
+
+    Args:
+        lambd: The rate parameter (lambda > 0).
+
+    Returns:
+        A non-negative float drawn from Exponential(lambda).
+    """
     return -log(random.random()) / lambd
 
 def gammaPdf(x, params):
+    """Evaluate the Gamma(alpha, beta) probability density function at ``x``.
+
+    Uses the rate (inverse-scale) parameterisation::
+
+        f(x; alpha, beta) = beta^alpha * x^(alpha-1) * exp(-beta*x)
+                            / Gamma(alpha)
+
+    Args:
+        x: The point at which to evaluate the PDF.  Must be positive.
+        params: A 2-tuple ``(alpha, beta)`` — the shape and rate
+            parameters.  Both must be positive.
+
+    Returns:
+        The gamma PDF value at ``x`` as a float.  Returns 0.0 if any of
+        ``x``, ``alpha``, or ``beta`` is non-positive.
+    """
     alpha, beta = params
     if x <= 0 or alpha <= 0 or beta <= 0:
         return 0.0
@@ -727,6 +1469,22 @@ def gammaPdf(x, params):
            gamma(alpha)
 
 def gammaPdf2(x, params):
+    """Evaluate the Gamma(alpha, beta) PDF at ``x`` using log-space arithmetic.
+
+    Numerically more stable than :func:`gammaPdf` for large parameter
+    values.  Computes the same distribution in log space::
+
+        log f = -x*beta + (alpha-1)*log(x) + alpha*log(beta) - gammaln(alpha)
+
+    Args:
+        x: The point at which to evaluate the PDF.  Must be positive.
+        params: A 2-tuple ``(alpha, beta)`` — the shape and rate
+            parameters (rate parameterisation).  Both must be positive.
+
+    Returns:
+        The gamma PDF value at ``x`` as a float.  Returns 0.0 if any of
+        ``x``, ``alpha``, or ``beta`` is non-positive.
+    """
     alpha, beta = params
     if x <= 0 or alpha <= 0 or beta <= 0:
         return 0.0
@@ -736,6 +1494,21 @@ def gammaPdf2(x, params):
 
 
 def gammaCdf(x, params):
+    """Evaluate the Gamma(alpha, beta) cumulative distribution function at ``x``.
+
+    Computes P(X <= x) using the lower incomplete gamma function::
+
+        F(x; alpha, beta) = gammainc(alpha, x*beta) / Gamma(alpha)
+
+    Args:
+        x: The point at which to evaluate the CDF.
+        params: A 2-tuple ``(alpha, beta)`` — the shape and rate
+            parameters (rate parameterisation).  Both must be positive.
+
+    Returns:
+        The cumulative probability P(X <= x) as a float.  Returns 0 if
+        ``x <= 0``.
+    """
     alpha, beta = params
     if x <= 0:
         return 0
@@ -744,10 +1517,27 @@ def gammaCdf(x, params):
 
 
 def betaPdf2(x, params):
-    """A simpler implementation of beta distribution but will overflow
-       for values of alpha and beta near 100
-    """
+    """Evaluate the Beta(alpha, beta) PDF at ``x`` using direct gamma computation.
 
+    Simpler but less numerically stable than :func:`betaPdf`; will
+    overflow for ``alpha`` or ``beta`` values near 100 because it
+    evaluates ``Gamma(alpha + beta)`` directly.
+
+    Formula::
+
+        f(x; alpha, beta) = Gamma(alpha+beta) / (Gamma(alpha)*Gamma(beta))
+                            * x^(alpha-1) * (1-x)^(beta-1)
+
+    Args:
+        x: The point at which to evaluate the PDF.  Must satisfy
+            ``0 < x < 1``.
+        params: A 2-tuple ``(alpha, beta)`` — the shape parameters, both
+            must be positive.
+
+    Returns:
+        The beta PDF value at ``x`` as a float.  Returns 0.0 if ``x``
+        is outside (0, 1) or if either shape parameter is non-positive.
+    """
     alpha, beta = params
     if 0 < x < 1 and alpha > 0 and beta > 0:
         return gamma(alpha + beta) / (gamma(alpha)*gamma(beta)) * \
@@ -756,6 +1546,24 @@ def betaPdf2(x, params):
         return 0.0
 
 def betaPdf(x, params):
+    """Evaluate the Beta(alpha, beta) PDF at ``x`` using log-gamma arithmetic.
+
+    Numerically stable implementation that avoids overflow by computing
+    the PDF in log space::
+
+        log f = gammaln(alpha+beta) - gammaln(alpha) - gammaln(beta)
+                + (alpha-1)*log(x) + (beta-1)*log(1-x)
+
+    Args:
+        x: The point at which to evaluate the PDF.  Must satisfy
+            ``0 < x < 1``.
+        params: A 2-tuple ``(alpha, beta)`` — the shape parameters, both
+            must be positive.
+
+    Returns:
+        The beta PDF value at ``x`` as a float.  Returns 0.0 if ``x``
+        is outside (0, 1) or if either shape parameter is non-positive.
+    """
     alpha, beta = params
 
     if 0 < x < 1 and alpha > 0 and beta > 0:
@@ -767,6 +1575,25 @@ def betaPdf(x, params):
 
 
 def betaPdf3(x, params):
+    """Evaluate the Beta(alpha, beta) PDF at ``x`` using a product formula.
+
+    Computes the PDF via a direct multiplicative recurrence with
+    integer-cast parameters.  Splits the product into two parts: a
+    symmetric core term up to ``min(alpha-1, beta-1)``, then an
+    asymmetric tail term up to ``max(alpha-1, beta-1)``.
+
+    Args:
+        x: The point at which to evaluate the PDF.  Must satisfy
+            ``0 < x < 1``.
+        params: A 2-tuple ``(alpha, beta)`` — the shape parameters.
+            Values are cast to ``int`` internally, so non-integer inputs
+            are truncated.  Both must be positive.
+
+    Returns:
+        The beta PDF value at ``x`` as a float.  Returns 0.0 if ``x``
+        is outside (0, 1) or if either shape parameter is non-positive
+        after truncation.
+    """
     alpha, beta = map(int, params)
     if 0 < x < 1 and alpha > 0 and beta > 0:
         n = min(alpha-1, beta-1)
@@ -790,10 +1617,23 @@ def betaPdf3(x, params):
 
 
 def gamma(x):
-    """
-    Lanczos approximation to the gamma function.
+    """Compute the gamma function Gamma(x) via the Lanczos approximation.
+
+    Uses the Lanczos coefficients to approximate Gamma(x) for positive
+    real ``x``.  The formula is::
+
+        Gamma(x) ≈ sqrt(2*pi) / x * (x + 5.5)^(x + 0.5) * exp(-x - 5.5)
+                   * series(x)
 
-    found on http://www.rskey.org/gamma.htm
+    where ``series(x)`` is the Lanczos sum with 7 coefficients.
+
+    Reference: http://www.rskey.org/gamma.htm
+
+    Args:
+        x: A positive real number.
+
+    Returns:
+        An approximation of Gamma(x) as a float.
     """
 
     ret = 1.000000000190015 + \
@@ -809,26 +1649,21 @@ def gamma(x):
 
 
 def gammaln(xx):
-    """
-    From numerical alogrithms in C
-
-    float gammln(float xx)
-    Returns the value ln[(xx)] for xx > 0.
-    {
-        Internal arithmetic will be done in double precision, a nicety that you can omit if five-figure
-        accuracy is good enough.
-        double x,y,tmp,ser;
-        static double cof[6]={76.18009172947146,-86.50532032941677,
-             24.01409824083091,-1.231739572450155,
-             0.1208650973866179e-2,-0.5395239384953e-5};
-        int j;
-        y=x=xx;
-        tmp=x+5.5;
-        tmp -= (x+0.5)*log(tmp);
-        ser=1.000000000190015;
-        for (j=0;j<=5;j++) ser += cof[j]/++y;
-        return -tmp+log(2.5066282746310005*ser/x);
-    }
+    """Compute the natural logarithm of the gamma function, ln(Gamma(xx)).
+
+    Implements the Lanczos approximation from *Numerical Algorithms in C*
+    (Press et al.).  Returns ``ln(Gamma(xx))`` for ``xx > 0``::
+
+        y = x = xx
+        tmp = x + 5.5 - (x + 0.5) * log(x + 5.5)
+        ser = 1.000000000190015 + sum(cof[j] / (y + j + 1) for j in 0..5)
+        return -tmp + log(2.5066282746310005 * ser / x)
+
+    Args:
+        xx: A positive real number.
+
+    Returns:
+        The natural logarithm of Gamma(xx) as a float.
     """
 
     cof = [76.18009172947146,-86.50532032941677,
@@ -851,7 +1686,23 @@ def gammaln(xx):
 
 GAMMA_INCOMP_ACCURACY = 1000
 def gammainc(a, x):
-    """Lower incomplete gamma function"""
+    """Compute the lower incomplete gamma function gamma(a, x).
+
+    Uses a series expansion truncated at ``GAMMA_INCOMP_ACCURACY`` terms
+    or when the current term drops below 0.0001::
+
+        gamma(a, x) = x^a * exp(-x) * sum_{n=0}^{inf} x^n / prod_{i=0}^{n}(a+i)
+
+    Reference: http://www.rskey.org/gamma.htm
+
+    Args:
+        a: The shape parameter (positive real number).
+        x: The upper integration limit (non-negative real number).
+
+    Returns:
+        An approximation of the lower incomplete gamma function
+        ``gamma(a, x)`` as a float.
+    """
     # found on http://www.rskey.org/gamma.htm
 
     ret = 0
@@ -865,6 +1716,22 @@ def gammainc(a, x):
 
 
 def erf(x):
+    """Compute an approximation of the error function erf(x).
+
+    Uses the rational approximation from the paper at
+    http://www.theorie.physik.uni-muenchen.de/~serge/erf-approx.pdf ::
+
+        a = (8 / (3*pi)) * (pi - 3) / (4 - pi)
+        erf(x) ≈ sign(x) * sqrt(1 - exp(-x^2 * (4/pi + a*x^2) / (1 + a*x^2)))
+
+    The approximation is accurate to approximately four decimal places.
+
+    Args:
+        x: A real number.
+
+    Returns:
+        An approximation of erf(x) in (-1, 1) as a float.
+    """
     # http://www.theorie.physik.uni-muenchen.de/~serge/erf-approx.pdf
 
     a = 8/(3*pi) * (pi - 3)/(4 - pi)
@@ -878,6 +1745,33 @@ def erf(x):
 
 
 def chiSquare(rows, expected=None, nparams=0):
+    """Compute the chi-square statistic and approximate p-value for a contingency table.
+
+    Given a 2-D table of observed counts ``rows``, computes expected
+    counts under independence (or uses the provided ``expected`` table),
+    then calculates::
+
+        chi^2 = sum((obs - exp)^2 / exp)
+
+    The degrees of freedom are
+    ``(nrows - 1) * (ncols - 1) - nparams``, clamped to at least 1.
+    The p-value is looked up in a hardcoded table via
+    :func:`chi_square_lookup`.
+
+    Args:
+        rows: A list of lists of observed counts.  All rows must have
+            the same length.
+        expected: A list of lists of expected counts with the same shape
+            as ``rows``.  If ``None`` (default), expected counts are
+            computed from marginal totals via :func:`make_expected`.
+        nparams: The number of estimated parameters to subtract from
+            the degrees of freedom.  Defaults to 0.
+
+    Returns:
+        A 2-tuple ``(chisq, p)`` where ``chisq`` is the chi-square
+        statistic (float) and ``p`` is the approximate p-value (float).
+        Returns ``(0, 1.0)`` if any row or column marginal sum is zero.
+    """
     # ex: rows = [[1,2,3],[1,4,5]]
     assert(len(set(map(len, rows))) <= 1)
 
@@ -901,6 +1795,20 @@ def chiSquare(rows, expected=None, nparams=0):
 
 
 def make_expected(rows):
+    """Compute expected counts for a contingency table under independence.
+
+    For each cell ``(i, j)``, the expected count is::
+
+        expected[i][j] = row_total[i] * col_total[j] / grand_total
+
+    Args:
+        rows: A list of lists of observed counts.  All rows must have
+            the same length.
+
+    Returns:
+        A list of lists of expected counts with the same shape as
+        ``rows``.
+    """
     rowtotals = map(sum, rows)
     coltotals = map(sum, zip(* rows))
     grandtotal = float(sum(rowtotals))
@@ -916,6 +1824,34 @@ def make_expected(rows):
 
 
 def chiSquareFit(xbins, ybins, func, nsamples, nparams, minsamples=5):
+    """Test a fitted distribution against binned data using a chi-square goodness-of-fit test.
+
+    Converts normalised bin heights ``ybins`` to raw counts, computes
+    expected counts from ``func`` integrated over each bin, discards
+    bins with fewer than ``minsamples`` expected observations, and then
+    calls :func:`chiSquare`.
+
+    Args:
+        xbins: A list of ``n+1`` bin-edge x-values (the left edges of
+            the first ``n`` bins).
+        ybins: A list of ``n`` normalised bin heights (density values,
+            not raw counts).
+        func: A callable ``func(x)`` representing the fitted PDF;
+            evaluated at each bin edge to compute expected bin mass.
+        nsamples: The total number of data samples used to convert
+            normalised heights to counts.
+        nparams: The number of fitted parameters to subtract from the
+            chi-square degrees of freedom.
+        minsamples: Minimum expected count required for a bin to be
+            included.  Defaults to 5.
+
+    Returns:
+        A 3-tuple ``(result, counts, expected)`` where ``result`` is the
+        ``(chisq, p)`` pair from :func:`chiSquare`, ``counts`` is the
+        list of observed counts for included bins, and ``expected`` is
+        the list of expected counts for included bins.  If no bins pass
+        the ``minsamples`` threshold, returns ``([0, 1], [], [])``.
+    """
     sizes = [xbins[i+1] - xbins[i] for i in range(len(xbins)-1)]
     sizes.append(sizes[-1])
 
@@ -973,7 +1909,23 @@ def chiSquareFit(xbins, ybins, func, nsamples, nparams, minsamples=5):
 
 
 def chi_square_lookup(value, df):
-
+    """Look up an approximate p-value for a chi-square statistic from a hardcoded table.
+
+    Compares ``value`` against the ``chi_square_table`` for the given
+    degrees of freedom ``df`` (capped at 30) and returns the largest
+    significance level whose critical value does not exceed ``value``.
+
+    Args:
+        value: The observed chi-square statistic.
+        df: Degrees of freedom.  Values above 30 are treated as 30;
+            values of 0 or less return 1.0.
+
+    Returns:
+        An approximate p-value from the set
+        ``{0.20, 0.10, 0.05, 0.025, 0.01, 0.001}`` as a float.
+        Returns 1.0 if ``value`` is smaller than all critical values in
+        the table row.
+    """
     ps = [0.20, 0.10, 0.05, 0.025, 0.01, 0.001]
 
     if df <= 0:
@@ -991,6 +1943,22 @@ def chi_square_lookup(value, df):
 
 
 def ttest(lst1, lst2):
+    """Compute the Welch's t-statistic for two independent samples.
+
+    Calculates the two-sample t-statistic using the Welch (unequal
+    variance) formula::
+
+        t = |mean(lst1) - mean(lst2)| / sqrt(var(lst1)/n1 + var(lst2)/n2)
+
+    Note:
+        The function computes ``t`` and ``df`` but does not return
+        anything; the implementation body is incomplete and has no
+        ``return`` statement.
+
+    Args:
+        lst1: The first sample as a list of numeric values.
+        lst2: The second sample as a list of numeric values.
+    """
     sdevdist = sqrt(var(lst1)/len(lst1) + var(lst2)/len(lst2))
     t = abs(mean(lst1) - mean(lst2)) / sdevdist
     df = len(lst2) + len(lst2) - 2
@@ -1049,8 +2017,29 @@ def ttest(lst1, lst2):
 
 
 def spearman(vec1, vec2):
-    """Spearman's rank test"""
+    """Compute a Spearman rank-order correlation-like statistic.
+
+    Computes a Z-score based on the sum of squared differences between
+    the original values (not their ranks, despite the name)::
+
+        R = sum((vec1[i] - vec2[i])^2 for i in range(n))
+        Z = (6*R - n*(n^2 - 1)) / (n*(n+1)*sqrt(n-1))
 
+    Note:
+        Despite the name, this implementation does not actually rank the
+        values before computing differences; it uses the raw values.
+        This differs from the standard Spearman rank correlation formula.
+
+    Args:
+        vec1: A list of numeric values.
+        vec2: A list of numeric values of the same length as ``vec1``.
+
+    Returns:
+        A Z-score float derived from the sum of squared raw differences.
+
+    Raises:
+        AssertionError: If ``vec1`` and ``vec2`` have different lengths.
+    """
     assert len(vec1) == len(vec2), "vec1 and vec2 are not the same length"
 
     n = len(vec1)
@@ -1065,11 +2054,26 @@ def spearman(vec1, vec2):
 
 
 
-# input:
-#   xdata, ydata  - data to fit
-#   func          - a function of the form f(x, params)
-#
 def fitCurve(xdata, ydata, func, paramsInit):
+    """Fit a parametric function to data using least-squares optimisation.
+
+    Uses :func:`scipy.optimize.leastsq` to minimise the sum of squared
+    residuals between ``ydata`` and ``func(x, params)`` evaluated at
+    each ``x`` in ``xdata``.
+
+    Args:
+        xdata: A list of x-values.
+        ydata: A list of observed y-values of the same length as
+            ``xdata``.
+        func: A callable ``func(x, params)`` that returns a scalar given
+            a single x-value and a parameter array.
+        paramsInit: Initial parameter guess as a list or array.
+
+    Returns:
+        A 2-tuple ``(params, resid_sum)`` where ``params`` is a list of
+        fitted parameter values and ``resid_sum`` is the sum of squared
+        residuals at the solution.
+    """
     import scipy.optimize
 
     y = np.array(ydata)
@@ -1087,6 +2091,27 @@ def error(params):
 
 
 def fitDistrib(func, paramsInit, data, start, end, step, perc=1.0):
+    """Fit a parametric distribution to a data histogram.
+
+    Note:
+        This function is currently disabled because it depends on
+        ``rasmus.util.distrib`` and ``rasmus.util.histbins``, which are
+        not available.  Calling it always raises ``NotImplementedError``.
+
+    Args:
+        func: A callable ``func(x, params)`` representing the PDF to fit.
+        paramsInit: Initial parameter guess.
+        data: The raw data samples to bin.
+        start: The lower edge of the histogram range.
+        end: The upper edge of the histogram range.
+        step: The bin width.
+        perc: A normalisation factor applied to bin heights.
+            Defaults to 1.0.
+
+    Raises:
+        NotImplementedError: Always, because the required dependency is
+            unavailable.
+    """
     # NOTE: fitDistrib is disabled because it depends on rasmus util.distrib
     # and util.histbins which are not available.
     # xdata, ydata = util.distrib(data, low=start, width=step)
@@ -1099,6 +2124,29 @@ def fitDistrib(func, paramsInit, data, start, end, step, perc=1.0):
 
 def plotfuncFit(func, paramsInit, xdata, ydata, start, end, step, plot=None,
                 **options):
+    """Fit a parametric function to data and (formerly) plot the result.
+
+    Calls :func:`fitCurve` to fit ``func`` to ``(xdata, ydata)`` and
+    returns the fitted parameters and residual sum.  Plotting via gnuplot
+    has been removed; the ``plot`` argument and plotting-related
+    parameters are retained for API compatibility but have no effect.
+
+    Args:
+        func: A callable ``func(x, params)`` representing the model.
+        paramsInit: Initial parameter guess.
+        xdata: A list of x-values.
+        ydata: A list of observed y-values.
+        start: Unused (formerly the start of the plot range).
+        end: Unused (formerly the end of the plot range).
+        step: Unused (formerly the plot step size).
+        plot: Unused.  Defaults to ``None``.
+        **options: Unused keyword arguments retained for compatibility.
+
+    Returns:
+        A 3-tuple ``(None, params, resid)`` where ``params`` is the list
+        of fitted parameters and ``resid`` is the sum of squared
+        residuals.
+    """
     # NOTE: plotting via gnuplot removed; returns params and resid only
     params, resid = fitCurve(xdata, ydata, func, paramsInit)
     # plot.plot(util.histbins(xdata), ydata, **options)
@@ -1108,13 +2156,61 @@ def plotfuncFit(func, paramsInit, xdata, ydata, start, end, step, plot=None,
 
 def plotdistribFit(func, paramsInit, data, start, end, step, plot=None,
                    **options):
+    """Fit a distribution to data and (formerly) plot the result.
+
+    Note:
+        This function is currently disabled because it depends on
+        ``rasmus.util.distrib``, which is not available.  Calling it
+        always raises ``NotImplementedError``.
+
+    Args:
+        func: A callable ``func(x, params)`` representing the PDF.
+        paramsInit: Initial parameter guess.
+        data: The raw data samples.
+        start: The lower edge of the histogram range.
+        end: The upper edge of the histogram range.
+        step: The bin width.
+        plot: Unused plot object.  Defaults to ``None``.
+        **options: Unused keyword arguments.
+
+    Raises:
+        NotImplementedError: Always, because the required dependency is
+            unavailable.
+    """
     # NOTE: disabled because it requires rasmus util.distrib
     raise NotImplementedError("plotdistribFit requires rasmus util.distrib which is not available")
 
 
 
 def solveCubic(a, b, c, real=True):
-    """solves x^3 + ax^2 + bx + c = 0 for x"""
+    """Solve the depressed-form cubic equation x^3 + ax^2 + bx + c = 0.
+
+    Applies the Cardano / Vieta substitution to reduce to a depressed
+    cubic and then computes all three cube roots using complex arithmetic.
+    Returns only real roots by default.
+
+    Algorithm:
+        1. Substitute ``x = t - a/3`` to eliminate the quadratic term,
+           yielding ``t^3 + pt + q = 0``.
+        2. Compute the square root of the discriminant
+           ``sqrt(q^2/4 + p^3/27)`` in complex arithmetic.
+        3. Find the three cube roots of ``q/2 + sqrt(...)`` using the
+           primitive cube root of unity.
+        4. Recover the three roots ``x_k = p/(3*u_k) - u_k - a/3``.
+
+    Args:
+        a: Coefficient of the x^2 term.
+        b: Coefficient of the x term.
+        c: The constant term.
+        real: If ``True`` (default), return only roots whose imaginary
+            part is smaller than 1e-10 in absolute value.  If ``False``,
+            return all three complex roots.
+
+    Returns:
+        A list of roots.  With ``real=True`` the list contains 1 or 3
+        real floats.  With ``real=False`` the list always contains 3
+        complex numbers.
+    """
 
     p = b - a*a / 3.0
     q = c + (2*a*a*a - 9*a*b) / 27.0
@@ -1155,7 +2251,18 @@ def solveCubic(a, b, c, real=True):
 
 
 def _solveCubic_test(n=100):
+    """Run a self-test of :func:`solveCubic` on random and fixed inputs.
+
+    Generates ``n`` random cubics (plus three fixed edge cases) and
+    verifies that each root ``x`` satisfies ``|x^3 + a*x^2 + b*x + c| < 1e-4``.
 
+    Args:
+        n: Number of random test cubics to generate.  Defaults to 100.
+
+    Raises:
+        AssertionError: If any computed root does not satisfy the
+            polynomial equation within tolerance.
+    """
     def test(a, b, c):
         xs = solveCubic(a, b, c)
 
diff --git a/src/seqlib/util.py b/src/seqlib/util.py
index 0d01e84..9d9d3db 100644
--- a/src/seqlib/util.py
+++ b/src/seqlib/util.py
@@ -33,52 +33,83 @@
 
 # Python 3 compatibility: cmp() was removed
 def cmp(a, b):
+    """Three-way comparison function for Python 3 compatibility.
+
+    Args:
+        a: First value.
+        b: Second value.
+
+    Returns:
+        1 if a > b, -1 if a < b, 0 if equal.
+    """
     return (a > b) - (a < b)
 
 
 
 
 class Bundle (dict):
-    """
-    A small class for creating a closure of variables
-    handy for nested functions that need to assign to variables in an 
-    outer scope
+    """A small class for creating a closure of variables.
 
-    Example:
+    Handy for nested functions that need to assign to variables in an outer
+    scope. Attributes and dictionary keys are kept in sync.
 
-    def func1():
-        this = Bundle(var1 = 0, var2 = "hello")
-        def func2():
-            this.var1 += 1
-        func2()
-        print(this.var1)
-    func1()
-    
-    will produce:
-    1
-    
+    Example::
+
+        def func1():
+            this = Bundle(var1=0, var2="hello")
+            def func2():
+                this.var1 += 1
+            func2()
+            print(this.var1)
+        func1()
+        # prints: 1
     """
 
     def __init__(self, **variables):
+        """Initialize a Bundle with keyword arguments as attributes.
+
+        Args:
+            **variables: Arbitrary keyword arguments that become both
+                attributes (self.key) and dictionary entries.
+        """
         for key, val in variables.items():
             setattr(self, key, val)
             dict.__setitem__(self, key, val)
 
     def __setitem__(self, key, val):
+        """Set a key both as an attribute and as a dict entry.
+
+        Args:
+            key: Attribute/key name.
+            val: Value to assign.
+        """
         setattr(self, key, val)
         dict.__setitem__(self, key, val)
 
 
 
 class Dict (dict):
-    """My personal nested Dictionary (with default values)"""
+    """A nested dictionary with configurable dimensionality and default values.
+
+    Accessing a missing key returns (and optionally inserts) a default value
+    or a nested Dict of one lower dimension, enabling multi-dimensional sparse
+    containers without explicit initialisation.
+    """
 
 
     def __init__(self, items=None, dim=1, default=None, insert=True):
-        """
-        items   -- items to initialize Dict (can be dict, list, iter)
-        dim     -- number of dimensions of the dictionary
-        default -- default value of a dictionary item
+        """Initialize a Dict.
+
+        Args:
+            items: Initial items to populate the dict (dict, list of pairs,
+                or other iterable). If an int is passed, it is treated as
+                the old-style positional dim argument for backwards
+                compatibility.
+            dim: Number of nesting dimensions (default 1).
+            default: Default value returned for missing leaf-level keys
+                (default None).
+            insert: If True, accessing a missing key inserts the default
+                value automatically (default True).
         """
 
         if isinstance(items, int):
@@ -97,6 +128,15 @@ def __init__(self, items=None, dim=1, default=None, insert=True):
 
 
     def __getitem__(self, i):
+        """Return the value for key i, inserting a default if missing.
+
+        Args:
+            i: The key to look up.
+
+        Returns:
+            The stored value, or a default Dict/copy of null if the key was
+            absent.
+        """
         if i not in self:
             if self._dim > 1:
                 ret = Dict(self._dim - 1, self._null)
@@ -109,6 +149,14 @@ def __getitem__(self, i):
 
 
     def has_keys(self, *keys):
+        """Check whether a sequence of nested keys all exist.
+
+        Args:
+            *keys: Keys to check at successive nesting levels.
+
+        Returns:
+            True if all keys are present at the corresponding nesting levels.
+        """
         if len(keys) == 0:
             return True
         elif len(keys) == 1:
@@ -118,6 +166,11 @@ def has_keys(self, *keys):
                    self[keys[0]].has_keys(*keys[1:])
 
     def write(self, out = sys.stdout):
+        """Write a human-readable representation of the dict to a stream.
+
+        Args:
+            out: Output stream to write to (default sys.stdout).
+        """
         def walk(node, path):
             if node.dim == 1:
                 for i in node:
@@ -137,39 +190,77 @@ def walk(node, path):
 
 
 class Percent (float):
+    """A float subclass that formats itself as a percentage string.
+
+    Attributes:
+        digits: Number of decimal places used when formatting (default 1).
+    """
     digits = 1
 
     def __str__(self):
+        """Return the value formatted as a percentage with self.digits decimals.
+
+        Returns:
+            String such as "42.0" representing 42.0% (i.e. float value 0.42).
+        """
         return (("%%.%df" % self.digits) % (float(self) * 100))
 
     def __repr__(self):
+        """Return the same string as __str__."""
         return str(self)
 
 
 class PushIter (object):
-    """Wrap an iterator in another iterator that allows one to push new
-       items onto the front of the iteration stream"""
+    """An iterator wrapper that allows pushing items back to the front of the stream.
+
+    Wraps any iterable and provides a push() method to prepend items.
+    """
 
     def __init__(self, it):
+        """Initialize a PushIter from any iterable.
+
+        Args:
+            it: Any iterable to wrap.
+        """
         self._it = iter(it)
         self._queue = []
 
     def __iter__(self):
+        """Return self as the iterator."""
         return self
 
     def __next__(self):
+        """Return the next item, preferring items from the push queue.
+
+        Returns:
+            The next item from the queue if non-empty, otherwise from the
+            underlying iterator.
+        """
         if len(self._queue) > 0:
             return self._queue.pop()
         else:
             return self.next(_it)
 
     def push(self, item):
-        """Push a new item onto the front of the iteration stream"""
+        """Push a new item onto the front of the iteration stream.
+
+        Args:
+            item: Item to prepend to the iteration.
+        """
         self._queue.append(item)
 
 
 def exceptDefault(func, val, exc=Exception):
-    """Specify a default value for when an exception occurs"""
+    """Call func() and return val if the specified exception is raised.
+
+    Args:
+        func: A zero-argument callable to invoke.
+        val: Default value to return on exception.
+        exc: Exception type (or tuple of types) to catch (default Exception).
+
+    Returns:
+        The return value of func(), or val if exc was raised.
+    """
     try:
         return func()
     except exc:
@@ -463,6 +554,16 @@ def frange(start, end, step):
 # simple matrix functions
 
 def make_matrix(nrows, ncols, val = 0):
+    """Create a 2D list (matrix) with given dimensions and a fill value.
+
+    Args:
+        nrows: Number of rows.
+        ncols: Number of columns.
+        val: Fill value for each cell (default 0); each cell gets a copy.
+
+    Returns:
+        A list of lists of shape (nrows, ncols) filled with copies of val.
+    """
     mat = []
     for i in range(nrows):
         row = []
@@ -585,12 +686,29 @@ def count(func, lst):
             n += 1
     return n
 
-def counteq(a, lst): return count(eqfunc(a), lst)
-def countneq(a, lst): return count(neqfunc(a), lst)
-def countle(a, lst): return count(lefunc(a), lst)
-def countlt(a, lst): return count(ltfunc(a), lst)
-def countge(a, lst): return count(gefunc(a), lst)
-def countgt(a, lst): return count(gtfunc(a), lst)
+def counteq(a, lst):
+    """Count items in lst equal to a."""
+    return count(eqfunc(a), lst)
+
+def countneq(a, lst):
+    """Count items in lst not equal to a."""
+    return count(neqfunc(a), lst)
+
+def countle(a, lst):
+    """Count items in lst less than or equal to a."""
+    return count(lefunc(a), lst)
+
+def countlt(a, lst):
+    """Count items in lst strictly less than a."""
+    return count(ltfunc(a), lst)
+
+def countge(a, lst):
+    """Count items in lst greater than or equal to a."""
+    return count(gefunc(a), lst)
+
+def countgt(a, lst):
+    """Count items in lst strictly greater than a."""
+    return count(gtfunc(a), lst)
 
 
 def find(func, *lsts):
@@ -629,12 +747,29 @@ def find(func, *lsts):
 
     return pos
 
-def findeq(a, lst): return find(eqfunc(a), lst)
-def findneq(a, lst): return find(neqfunc(a), lst)
-def findle(a, lst): return find(lefunc(a), lst)
-def findlt(a, lst): return find(ltfunc(a), lst)
-def findge(a, lst): return find(gefunc(a), lst)
-def findgt(a, lst): return find(gtfunc(a), lst)
+def findeq(a, lst):
+    """Return indices of items in lst equal to a."""
+    return find(eqfunc(a), lst)
+
+def findneq(a, lst):
+    """Return indices of items in lst not equal to a."""
+    return find(neqfunc(a), lst)
+
+def findle(a, lst):
+    """Return indices of items in lst less than or equal to a."""
+    return find(lefunc(a), lst)
+
+def findlt(a, lst):
+    """Return indices of items in lst strictly less than a."""
+    return find(ltfunc(a), lst)
+
+def findge(a, lst):
+    """Return indices of items in lst greater than or equal to a."""
+    return find(gefunc(a), lst)
+
+def findgt(a, lst):
+    """Return indices of items in lst strictly greater than a."""
+    return find(gtfunc(a), lst)
 
 
 def islands(lst):
@@ -748,13 +883,42 @@ def minfunc(func, lst):
 #   count(ltfunc(4), lst)  ==> returns the number of values in lst < 4
 #
 
-def eqfunc(a): return lambda x: x == a
-def neqfunc(a): return lambda x: x != a
-def ltfunc(a): return lambda x: x < a
-def gtfunc(a): return lambda x: x > a
-def lefunc(a): return lambda x: x <= a
-def gefunc(a): return lambda x: x >= a
+def eqfunc(a):
+    """Return a function that tests equality with a."""
+    return lambda x: x == a
+
+def neqfunc(a):
+    """Return a function that tests inequality with a."""
+    return lambda x: x != a
+
+def ltfunc(a):
+    """Return a function that tests x < a."""
+    return lambda x: x < a
+
+def gtfunc(a):
+    """Return a function that tests x > a."""
+    return lambda x: x > a
+
+def lefunc(a):
+    """Return a function that tests x <= a."""
+    return lambda x: x <= a
+
+def gefunc(a):
+    """Return a function that tests x >= a."""
+    return lambda x: x >= a
+
 def withinfunc(a, b, ainc=True, binc=True):
+    """Return a function that tests whether x is within the range [a, b].
+
+    Args:
+        a: Lower bound.
+        b: Upper bound.
+        ainc: If True, the lower bound is inclusive (default True).
+        binc: If True, the upper bound is inclusive (default True).
+
+    Returns:
+        A one-argument function returning True if x is in the specified range.
+    """
     if ainc:
         if binc:
             return lambda x: a <= x <= b
@@ -775,25 +939,69 @@ def lg(num):
     """Retruns the log_2 of a number"""
     return math.log(num, 2)
 
-def add(a, b): return a + b
-def sub(a, b): return a - b
-def mul(a, b): return a * b
-def idiv(a, b): return a / b
-def div(a, b): return a / float(b)
+def add(a, b):
+    """Return a + b."""
+    return a + b
+
+def sub(a, b):
+    """Return a - b."""
+    return a - b
+
+def mul(a, b):
+    """Return a * b."""
+    return a * b
+
+def idiv(a, b):
+    """Return a / b (true division)."""
+    return a / b
+
+def div(a, b):
+    """Return a / float(b)."""
+    return a / float(b)
 
 def safediv(a, b, default=INF):
+    """Divide a by b, returning default on ZeroDivisionError.
+
+    Args:
+        a: Numerator.
+        b: Denominator.
+        default: Value to return when b is zero (default INF).
+
+    Returns:
+        a / float(b), or default if b is zero.
+    """
     try:
         return a / float(b)
     except ZeroDivisionError:
         return default
 
 def safelog(x, base=math.e, default=-INF):
+    """Compute log(x) in the given base, returning default on error.
+
+    Args:
+        x: Value to take the logarithm of.
+        base: Logarithm base (default math.e for natural log).
+        default: Value to return when x <= 0 or overflow occurs (default -INF).
+
+    Returns:
+        math.log(x, base), or default on OverflowError or ValueError.
+    """
     try:
         return math.log(x, base)
     except (OverflowError, ValueError):
         return default
 
-def invcmp(a, b): return cmp(b, a)  # cmp is defined locally above
+def invcmp(a, b):
+    """Return the reversed comparison of a and b (i.e. cmp(b, a)).
+
+    Args:
+        a: First value.
+        b: Second value.
+
+    Returns:
+        1 if b > a, -1 if b < a, 0 if equal.
+    """
+    return cmp(b, a)  # cmp is defined locally above
 
 def clamp(x, low, high):
     """Clamps a value 'x' between the values 'low' and 'high'
@@ -809,6 +1017,15 @@ def clamp(x, low, high):
         return x
 
 def clampfunc(low, high):
+    """Return a function that clamps its argument between low and high.
+
+    Args:
+        low: Lower bound (or None for no lower bound).
+        high: Upper bound (or None for no upper bound).
+
+    Returns:
+        A one-argument function equivalent to clamp(x, low, high).
+    """
     return lambda x: clamp(x, low, high)
 
 
@@ -1106,6 +1323,16 @@ def write_delim(filename, data, delim="\t"):
 #
 
 def default_justify(val):
+    """Return the default column justification for a value.
+
+    Numeric types (int, float) are right-justified; everything else is left.
+
+    Args:
+        val: The value whose justification is needed.
+
+    Returns:
+        "right" for int/float values, "left" otherwise.
+    """
     if isinstance(val, int) or \
        isinstance(val, float):
         return "right"
@@ -1114,6 +1341,18 @@ def default_justify(val):
 defaultJustify = default_justify
 
 def default_format(val):
+    """Format a value for tabular display.
+
+    Integers are formatted with comma separators via int2pretty. Percent
+    values use their own __str__. Small floats use scientific notation;
+    others use 4 decimal places. Everything else uses str().
+
+    Args:
+        val: The value to format.
+
+    Returns:
+        A human-readable string representation of val.
+    """
     if isinstance(val, int) and \
        not isinstance(val, bool):
         return int2pretty(val)
@@ -1196,7 +1435,18 @@ def printcols(data, width=None, spacing=1, format=defaultFormat,
 
 
 def list2matrix(lst, nrows=None, ncols=None, bycols=True):
-    """Turn a list into a matrix by wrapping its entries"""
+    """Reshape a flat list into a 2D matrix.
+
+    Args:
+        lst: The list to reshape.
+        nrows: Number of rows. Inferred from ncols if not given.
+        ncols: Number of columns. Inferred from nrows if not given.
+            If neither is given, a roughly square shape is used.
+        bycols: If True, fill the matrix column-by-column (default True).
+
+    Returns:
+        A list of lists representing the reshaped matrix.
+    """
 
     mat = []
 
@@ -1222,7 +1472,15 @@ def list2matrix(lst, nrows=None, ncols=None, bycols=True):
 
 
 def printwrap(text, width=80, prefix="", out=sys.stdout):
-    """Prints text with wrapping"""
+    """Print text with line wrapping at a fixed column width.
+
+    Args:
+        text: The string to print.
+        width: Maximum number of characters per line (default 80).
+            If None, print the text as a single line with no wrapping.
+        prefix: String prepended to each wrapped line (default "").
+        out: Output stream (default sys.stdout).
+    """
     if width == None:
         out.write(text)
         out.write("\n")
@@ -1276,7 +1534,21 @@ def print_dict(dic, key=lambda x: x, val=lambda x: x,
               spacing=4, out=sys.stdout,
               format=defaultFormat,
               justify=defaultJustify):
-    """Print s a dictionary in two columns"""
+    """Print a dictionary as an aligned two-column table.
+
+    Args:
+        dic: Dictionary to print.
+        key: Function applied to keys before printing (default identity).
+        val: Function applied to values before printing (default identity).
+        num: Maximum number of entries to print. Defaults to all.
+        cmp: Comparison function (unused in Python 3; kept for compatibility).
+        order: Key function for sorting items. If None, default sort is used.
+        reverse: If True, sort in descending order (default False).
+        spacing: Number of spaces between columns (default 4).
+        out: Output stream (default sys.stdout).
+        format: Formatting function for cell values (default default_format).
+        justify: Justification function for cell values (default default_justify).
+    """
 
     if num == None:
         num = len(dic)
@@ -1299,13 +1571,32 @@ def print_dict(dic, key=lambda x: x, val=lambda x: x,
 #
 
 class SafeReadIter:
+    """An iterator over a file handle that stops at EOF without raising an error.
+
+    Unlike a bare for-loop over a file, this class uses readline() and raises
+    StopIteration when an empty string (EOF) is encountered.
+    """
     def __init__(self, infile):
+        """Initialize from an open file handle.
+
+        Args:
+            infile: An open file handle to iterate over.
+        """
         self.infile = infile
 
     def __iter__(self):
+        """Return self as the iterator."""
         return self
 
     def __next__(self):
+        """Return the next line or raise StopIteration at EOF.
+
+        Returns:
+            Next line string from the file.
+
+        Raises:
+            StopIteration: When end of file is reached.
+        """
         line = self.infile.readline()
         if line == "":
             raise StopIteration
@@ -1313,6 +1604,15 @@ def __next__(self):
             return line
 
 def readWord(infile, delims = [" ", "\t", "\n"]):
+    """Read the next whitespace-delimited word from a file stream.
+
+    Args:
+        infile: An open file handle to read from.
+        delims: List of delimiter characters (default space, tab, newline).
+
+    Returns:
+        The next word as a string, or an empty string at EOF.
+    """
     word = ""
 
     while True:
@@ -1331,6 +1631,16 @@ def readWord(infile, delims = [" ", "\t", "\n"]):
 
 
 def readUntil(stream, chars):
+    """Read from stream until one of the given characters (or EOF) is seen.
+
+    Args:
+        stream: An open file handle.
+        chars: String or iterable of stop characters.
+
+    Returns:
+        A tuple (token, char) where token is the accumulated string before
+        the stop character, and char is the stop character (or "" at EOF).
+    """
     token = ""
     while True:
         char = stream.read(1)
@@ -1340,6 +1650,17 @@ def readUntil(stream, chars):
 
 
 def readWhile(stream, chars):
+    """Read from stream while characters are in the given set.
+
+    Args:
+        stream: An open file handle.
+        chars: String or iterable of accepted characters.
+
+    Returns:
+        A tuple (token, char) where token is the accumulated string of
+        matching characters, and char is the first non-matching character
+        (or "" at EOF).
+    """
     token = ""
     while True:
         char = stream.read(1)
@@ -1349,6 +1670,14 @@ def readWhile(stream, chars):
 
 
 def skipComments(infile):
+    """Yield non-comment, non-blank lines from a file.
+
+    Args:
+        infile: An iterable of lines (e.g. an open file handle).
+
+    Yields:
+        Lines that do not start with "#" and are not blank.
+    """
     for line in infile:
         if line.startswith("#") or line.startswith("\n"):
             continue
@@ -1356,26 +1685,51 @@ def skipComments(infile):
 
 
 class IndentStream:
-    """
-    Makes any stream into an indent stream.
-    
-    Indent stream auto indents every line written to it
+    """A write-only stream wrapper that automatically indents every line.
+
+    Tracks a current indentation depth and prepends that many spaces to the
+    start of each new line. Use indent() and dedent() to change the depth.
+
+    Attributes:
+        stream: The underlying writable stream.
+        linestart: True when the next character written begins a new line.
+        depth: Current indentation level in spaces.
     """
 
     def __init__(self, stream):
+        """Initialize an IndentStream wrapping the given stream.
+
+        Args:
+            stream: A filename string or writable file object to wrap.
+        """
         self.stream = open_stream(stream, "w")
         self.linestart = True
         self.depth = 0
 
     def indent(self, num=2):
+        """Increase the indentation depth.
+
+        Args:
+            num: Number of spaces to add (default 2).
+        """
         self.depth += num
 
     def dedent(self, num=2):
+        """Decrease the indentation depth, clamped to zero.
+
+        Args:
+            num: Number of spaces to remove (default 2).
+        """
         self.depth -= num
         if self.depth < 0:
             self.depth = 0
 
     def write(self, text):
+        """Write text to the underlying stream, prepending indentation as needed.
+
+        Args:
+            text: The string to write.
+        """
         lines = text.split("\n")
 
         for line in lines[:-1]:
@@ -1473,7 +1827,18 @@ def replace_ext(filename, oldext, newext):
 
 
 def sortrank(lst, cmp=None, key=None, reverse=False):
-    """Returns the ranks of items in lst"""
+    """Return the indices that would sort lst.
+
+    Args:
+        lst: The list to rank.
+        cmp: Comparison function (deprecated; ignored if key is provided).
+        key: A one-argument function to extract a comparison key from
+            each list element (default identity).
+        reverse: If True, sort in descending order (default False).
+
+    Returns:
+        A list of integer indices such that [lst[i] for i in result] is sorted.
+    """
     ind = list(range(len(lst)))
 
     if key is None:
@@ -1497,7 +1862,14 @@ def sort_together(compare, lst, *others):
 sortTogether = sort_together
 
 def invperm(perm):
-    """Returns the inverse of a permutation 'perm'"""
+    """Return the inverse of a permutation.
+
+    Args:
+        perm: A list of unique integers 0..n-1 representing a permutation.
+
+    Returns:
+        A list inv such that inv[perm[i]] == i for all i.
+    """
     inv = [0] * len(perm)
     for i in range(len(perm)):
         inv[perm[i]] = i
@@ -1511,14 +1883,33 @@ def invperm(perm):
 #
 
 def oneNorm(vals):
-    """Normalize values so that they sum to 1"""
+    """Normalize a list of values so that they sum to 1.
+
+    Args:
+        vals: A list or iterable of numeric values.
+
+    Returns:
+        A list of values each divided by the total sum.
+    """
     s = float(sum(vals))
     return [x/s for x in vals]
 
 
 def bucketSize(array, ndivs=None, low=None, width=None):
-    """Determine the bucket size needed to divide the values in array into 
-       'ndivs' evenly sized buckets"""
+    """Determine bucket parameters for dividing array values into bins.
+
+    Exactly one of ndivs or width should be supplied (or neither, which
+    defaults to ndivs=20). The other value is derived from the data.
+
+    Args:
+        array: A sequence of numeric values.
+        ndivs: Desired number of bins. Derived from width if not given.
+        low: Lower bound for binning. Defaults to min(array).
+        width: Desired bin width. Derived from ndivs if not given.
+
+    Returns:
+        A tuple (ndivs, low, width) with all three values resolved.
+    """
 
     if low is None:
         low = min(array)
@@ -1545,7 +1936,20 @@ def bucketBin(item, ndivs, low, width):
 
 
 def bucket(array, ndivs=None, low=None, width=None, key=lambda x: x):
-    """Group elements of 'array' into 'ndivs' lists"""
+    """Group elements of array into ndivs buckets.
+
+    Args:
+        array: A sequence of items to bucket.
+        ndivs: Number of buckets (inferred if not given).
+        low: Lower bound for the first bucket (default min of key values).
+        width: Bucket width (inferred if not given).
+        key: Function to extract a numeric comparison key from each item
+            (default identity).
+
+    Returns:
+        A tuple (x, h) where x is a list of bucket lower-bound values and
+        h is a list of lists containing the array items in each bucket.
+    """
 
     keys = map(key, array)
 
@@ -1566,7 +1970,18 @@ def bucket(array, ndivs=None, low=None, width=None, key=lambda x: x):
 
 
 def hist(array, ndivs=None, low=None, width=None):
-    """Create a histogram of 'array' with 'ndivs' buckets"""
+    """Create a histogram of array values.
+
+    Args:
+        array: A sequence of numeric values.
+        ndivs: Number of histogram bins (default 20 if width is also None).
+        low: Lower bound of the first bin. Defaults to min(array).
+        width: Bin width (inferred from ndivs if not given).
+
+    Returns:
+        A tuple (x, h) where x is a list of bin lower-bound values and
+        h is a list of integer counts for each bin.
+    """
 
     # set bucket sizes
     ndivs, low, width = bucketSize(array, ndivs, low, width)
@@ -1590,7 +2005,22 @@ def hist2(array1, array2,
           ndivs1=None, ndivs2=None,
           low1=None, low2=None,
           width1=None, width2=None):
-    """Perform a 2D histogram"""
+    """Perform a 2D histogram over two arrays.
+
+    Args:
+        array1: First sequence of numeric values (mapped to columns).
+        array2: Second sequence of numeric values (mapped to rows).
+        ndivs1: Number of bins for array1 (default derived from data).
+        ndivs2: Number of bins for array2 (default derived from data).
+        low1: Lower bound for array1 bins. Defaults to min(array1).
+        low2: Lower bound for array2 bins. Defaults to min(array2).
+        width1: Bin width for array1 (inferred if not given).
+        width2: Bin width for array2 (inferred if not given).
+
+    Returns:
+        A tuple (labels, h) where labels is a 2D list of [x, y] bin
+        coordinates and h is a 2D list of integer counts.
+    """
 
 
     # set bucket sizes
@@ -1615,8 +2045,14 @@ def hist2(array1, array2,
 
 
 def histbins(bins):
-    """Adjust the bins from starts to centers, this will allow GNUPLOT to plot
-       histograms correctly"""
+    """Convert bin start positions to bin center positions for GNUPLOT plotting.
+
+    Args:
+        bins: A list of bin start positions.
+
+    Returns:
+        A list of bin center positions the same length as bins.
+    """
 
     bins2 = []
 
@@ -1631,7 +2067,21 @@ def histbins(bins):
 
 
 def distrib(array, ndivs=None, low=None, width=None):
-    """Find the distribution of 'array' using 'ndivs' buckets"""
+    """Compute the probability density distribution of array.
+
+    Normalises histogram counts by the total number of items and bin width,
+    giving an approximate PDF.
+
+    Args:
+        array: A sequence of numeric values.
+        ndivs: Number of bins (default derived from data).
+        low: Lower bound of the first bin. Defaults to min(array).
+        width: Bin width (inferred if not given).
+
+    Returns:
+        A tuple (x, h) where x is bin lower-bound values and h is a list
+        of density values (count / total / width).
+    """
 
     # set bucket sizes
     ndivs, low, width = bucketSize(array, ndivs, low, width)
@@ -1674,6 +2124,17 @@ def hist_dict(array):
 
 def print_hist(array, ndivs=20, low=None, width=None,
               cols=75, spacing=2, out=sys.stdout):
+    """Print a text-based histogram with ASCII bar chart.
+
+    Args:
+        array: A sequence of numeric values to histogram.
+        ndivs: Number of bins (default 20).
+        low: Lower bound for the first bin. Defaults to min(array).
+        width: Bin width (inferred if not given).
+        cols: Total character width of the output including bars (default 75).
+        spacing: Number of spaces between columns (default 2).
+        out: Output stream (default sys.stdout).
+    """
     data = list(hist(array, ndivs, low=low, width=width))
 
     # find max bar