diff --git a/codeface/R/cluster/community_metrics.r b/codeface/R/cluster/community_metrics.r index 4c76cfc2..8d06bff4 100644 --- a/codeface/R/cluster/community_metrics.r +++ b/codeface/R/cluster/community_metrics.r @@ -757,6 +757,7 @@ compute.project.graph.trends <- revision.data[sapply(revision.data, is.null)] <- NULL ## Create igraph object and select communities which are of a minimum size 4 + loginfo("Compute communities") revision.data <- mclapply(revision.data, mc.cores=n.cores, function(rev) { @@ -781,6 +782,7 @@ compute.project.graph.trends <- revision.data[sapply(revision.data, is.null)] <- NULL ## Compute network metrics + loginfo("Compute network metrics") revision.df.list <- mclapply(revision.data, mc.cores=n.cores, function(rev) { @@ -848,8 +850,34 @@ plot.influence.ts <- function(project.stats) { return(plot1) } +build.filenames <- function(outdir, project.name, type.name, analysis.method) { + plot.organization <- c("project", "plot_type") + + filenames <- sapply(plot.organization, + function(p.org) { + if(p.org == "project") { + folder.name <- paste(project.name, "_", + analysis.method, sep="") + plot.name <- paste(type.name, ".png", sep="") + } + else if (p.org == "plot_type") { + folder.name <- paste(type.name, "_", + analysis.method, sep="") + plot.name <- paste(project.name, ".png", sep="") + } + output.path <- file.path(outdir, p.org, + folder.name) + dir.create(output.path, recursive=T) + filename <- file.path(output.path, plot.name) + return(filename) + }) + + return(filenames) +} plot.box <- function(project.df, feature, outdir) { + loginfo("Plotting box plot for feature %s", feature) + ## Select all rows for the feature keep.row <- project.df$metric == feature project.df <- project.df[keep.row,] @@ -871,13 +899,17 @@ plot.box <- function(project.df, feature, outdir) { ylim1[1] <- 0 p1 = p0 + coord_cartesian(ylim = ylim1*1.05) - file.dir <- paste(outdir, "/", project.name, "_", analysis.method, sep="") - dir.create(file.dir, recursive=T) - file.name <- paste(file.dir, "/", feature, ".png",sep="") - ggsave(file.name, p1, height=8, width=20) + file.names <- build.filenames(outdir, project.name, feature, + analysis.method) + + sapply(file.names, + function(filename) ggsave(filename, p1, height=8, + width=20)) ## Adjusted box plots for skewed data - file.name <- paste(file.dir, "/", feature, "_adjusted.pdf", sep="") + file.names <- build.filenames(outdir, project.name, + paste(feature, "_adjusted.pdf", sep=""), + analysis.method) pdf(file.name) @@ -889,16 +921,18 @@ plot.box <- function(project.df, feature, outdir) { dev.off() if(feature %in% c('page.rank','v.degree')) { - file.name <- paste(file.dir, '/', feature, "_distribution.pdf", sep="") p2 <- ggplot(project.df, aes(x=value)) + geom_histogram(aes(y=..density..),colour="black", fill="white") + geom_density(alpha=.2, fill="#FF6666") - ggsave(file.name, p2, height=8, width=20) + sapply(file.names, + function(file.name) ggsave(file.name, p2, height=8, width=20)) } } } plot.series <- function(project.df, feature, outdir) { + loginfo("Plot time series for feature %s", feature) + ## Select all rows for the feature keep.row <- project.df$metric %in% feature project.df <- project.df[keep.row,] @@ -925,10 +959,10 @@ plot.series <- function(project.df, feature, outdir) { strip.text.x = element_text(size=15)) } - file.dir <- paste(outdir, "/", project.name, "_", analysis.method, sep="") - dir.create(file.dir, recursive=T) - file.name <- paste(file.dir, "/time_series_metrics.png",sep="") - ggsave(file.name, p, height=41, width=20) + file.names <- build.filenames(outdir, project.name, "time_series", + analysis.method) + sapply(file.names, + function(file.name) ggsave(file.name, p, height=41, width=20)) } @@ -956,10 +990,12 @@ plot.scatter <- function(project.df, feature1, feature2, outdir) { facet_wrap( ~ cycle) + geom_smooth(method="lm") - file.dir <- paste(outdir, "/", project.name, "_", analysis.method, sep="") - dir.create(file.dir, recursive=T) - file.name <- paste(file.dir, "/", feature1, "_vs_", feature2, ".png",sep="") - ggsave(file.name, p, height=40, width=40) + feature <- paste(feature1, "_vs_", feature2, sep="") + file.names <- build.filenames(outdir, project.name, feature, + analysis.method) + + sapply(file.names, + function(file.name) ggsave(file.name, p, height=40, width=40)) } } @@ -1002,15 +1038,17 @@ write.plots.trends <- function(trends, markov.chains, developer.classifications, 'num.power.law', 'edge.vert.ratio') - ## Generate and save box plots for each project + loginfo("Saving box plots") dlply(trends, .(p.id), function(df) sapply(metrics.box, function(m) plot.box(df, m, outdir))) ## Generate and save series plots + loginfo("Saving time series") dlply(trends, .(p.id), function(df) plot.series(df, metrics.series, outdir)) ## Gernerate scatter plots + loginfo("Saving scatter plots") dlply(trends, .(p.id), function(df) plot.scatter(df, "v.degree", "cluster.coefficient", outdir)) @@ -1018,8 +1056,10 @@ write.plots.trends <- function(trends, markov.chains, developer.classifications, analysis.method <- unique(trends$analysis.method) file.dir <- paste(outdir, "/", project.name, "_", analysis.method, sep="") + dir.create(file.dir, recursive=T) ## Save markov chain plot + loginfo("Saving Markov chains") if(!is.null(markov.chains)) { chain.types <- names(markov.chains) for (type in chain.types) { @@ -1037,6 +1077,7 @@ write.plots.trends <- function(trends, markov.chains, developer.classifications, } ## Save data to file + loginfo("Save data files") data <- list(trends=trends,markov.chains=markov.chains, developer.classifications= developer.classifications, class.edge.probs=class.edge.probs, diff --git a/codeface/R/cluster/persons.r b/codeface/R/cluster/persons.r index 44b33fdb..fbfffdca 100755 --- a/codeface/R/cluster/persons.r +++ b/codeface/R/cluster/persons.r @@ -1356,16 +1356,3 @@ test.community.quality.modularity <- function() { quality <- community.metric(g, g.clust, "modularization") } - -######################################################################### -## Executed Statements -######################################################################### -##---------------------------- -## Parse commandline arguments -##---------------------------- - -config.script.run({ - conf <- config.from.args(positional.args=list("resdir", "range.id"), - require.project=TRUE) - performAnalysis(conf$resdir, conf) -}) diff --git a/codeface/R/cluster/run_analysis.r b/codeface/R/cluster/run_analysis.r new file mode 100755 index 00000000..2fce2143 --- /dev/null +++ b/codeface/R/cluster/run_analysis.r @@ -0,0 +1,31 @@ +#! /usr/bin/env Rscript +## Analyse the developer connections + +## This file is part of Codeface. Codeface is free software: you can +## redistribute it and/or modify it under the terms of the GNU General Public +## License as published by the Free Software Foundation, version 2. +## +## This program is distributed in the hope that it will be useful, but WITHOUT +## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +## details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +## +## Copyright 2010, 2011 by Wolfgang Mauerer +## Copyright 2012, 2013, Siemens AG, Wolfgang Mauerer +## All Rights Reserved. + +source("persons.r") + +##---------------------------- +## Parse commandline arguments +##---------------------------- + +config.script.run({ + conf <- config.from.args(positional.args=list("resdir", "range.id"), + require.project=TRUE) + performAnalysis(conf$resdir, conf) +}) diff --git a/codeface/R/network_stream.r b/codeface/R/network_stream.r index 5fad97b7..5b8e3d29 100755 --- a/codeface/R/network_stream.r +++ b/codeface/R/network_stream.r @@ -112,6 +112,8 @@ build.dev.net.stream <- function(con, project.id, type, dates.df, construct.edgelist <- function(commit.list, add.co.change.rel, add.semantic.rel) { + if(length(commit.list$commit.df) == 0) return(list()) + ## Compute relation for developer contribution to common entity entity.groups <- aggregate.on.common.entity(commit.list$commit.df) diff --git a/codeface/R/semantic_dependency.r b/codeface/R/semantic_dependency.r index cb44cfdb..651bf756 100644 --- a/codeface/R/semantic_dependency.r +++ b/codeface/R/semantic_dependency.r @@ -184,9 +184,11 @@ computeSemanticCoupling <- function(depend.df, threshold=0.5) { tdm <- processTermDocMat(corp) ## Compute document similarity using latent semantic analysis + loginfo("Computing document similarity") dist.mat <- computeDocSimilarity(tdm) ## Remove documents that have low similarity + loginfo("Remove dissimilar documents") edgelist <- cmpfun(getSimDocIds)(dist.mat, threshold) ## Mapping of document ids to document names @@ -196,5 +198,7 @@ computeSemanticCoupling <- function(depend.df, threshold=0.5) { res <- list(edgelist=edgelist, vertex.data=vertex.data) + loginfo("Finished semantic similarity computation") + return(res) } diff --git a/codeface/dbmanager.py b/codeface/dbmanager.py index 7d84bf9b..93f1f8d0 100644 --- a/codeface/dbmanager.py +++ b/codeface/dbmanager.py @@ -64,7 +64,7 @@ def __init__(self, conf): raise self.cur = self.con.cursor() - max_packet_size = 1024 * 1024 * 256 + max_packet_size = 1024 * 1024 * 512 self.doExec("SET GLOBAL max_allowed_packet=%s", (max_packet_size,)) def __del__(self): @@ -85,7 +85,9 @@ def doExec(self, stmt, args=None): if dbe.args[0] == 1213: # Deadlock! retry... log.warning("Recoverable deadlock in MySQL - retrying.") elif dbe.args[0] == 2006: # Server gone away... - log.warning("MySQL Server gone away, trying to reconnect.") + log.warning("MySQL Server gone away, trying to " + "reconnect. If warning persists, try " + "increasing the max_allowed_packet size.") self.con.ping(True) elif dbe.args[0] == 2013: # Lost connection to MySQL server during query... log.warning("Lost connection to MySQL server during query, trying to reconnect.") diff --git a/codeface/project.py b/codeface/project.py index abb0330b..e37114bb 100644 --- a/codeface/project.py +++ b/codeface/project.py @@ -114,7 +114,7 @@ def project_analyse(resdir, gitdir, codeface_conf, project_conf, ######### # STAGE 2: Cluster analysis - exe = abspath(resource_filename(__name__, "R/cluster/persons.r")) + exe = abspath(resource_filename(__name__, "R/cluster/run_analysis.r")) cwd, _ = pathsplit(exe) cmd = [] cmd.append(exe)