Skip to content

Commit 10c1152

Browse files
committed
changed processTransposeBatch to prune duplicate arcs
1 parent e0ae0c7 commit 10c1152

File tree

1 file changed

+46
-18
lines changed

1 file changed

+46
-18
lines changed

src/it/unimi/dsi/webgraph/Transform.java

+46-18
Original file line numberDiff line numberDiff line change
@@ -1199,7 +1199,7 @@ public int[] successorArray() {
11991199
final int numPairs = this.numPairs;
12001200
// Neither quicksort nor heaps are stable, so we reestablish order here.
12011201
IntArrays.quickSort(successor, 0, numPairs);
1202-
if (numPairs!= 0) {
1202+
if (numPairs != 0) {
12031203
int p = 0;
12041204
for (int j = 1; j < numPairs; j++) if (successor[p] != successor[j]) successor[++p] = successor[j];
12051205
outdegree = p + 1;
@@ -1311,6 +1311,8 @@ class InternalArcLabelledNodeIterator extends ArcLabelledNodeIterator {
13111311
private int last;
13121312
/** The outdegree of the current node (valid if {@link #last} is not -1). */
13131313
private int outdegree;
1314+
/** The number of pairs associated with the current node (valid if {@link #last} is not -1). */
1315+
private int numPairs;
13141316
/** The successors of the current node (valid if {@link #last} is not -1);
13151317
* only the first {@link #outdegree} entries are meaningful. */
13161318
private int[] successor;
@@ -1319,7 +1321,7 @@ class InternalArcLabelledNodeIterator extends ArcLabelledNodeIterator {
13191321
private Label[] label;
13201322

13211323
public InternalArcLabelledNodeIterator(final int upperBound) throws IOException {
1322-
this(upperBound, null, null, null, null, null, -1, 0, IntArrays.EMPTY_ARRAY, Label.EMPTY_LABEL_ARRAY);
1324+
this(upperBound, null, null, null, null, null, -1, -1, IntArrays.EMPTY_ARRAY, Label.EMPTY_LABEL_ARRAY);
13231325
}
13241326

13251327
public InternalArcLabelledNodeIterator(final int upperBound, final InputBitStream[] baseIbs, final InputBitStream[] baseLabelInputBitStream, final int[] refArray, final int[] prevTarget, final int[] inputStreamLength, final int last, final int outdegree, final int successor[], final Label[] label) throws IOException {
@@ -1377,8 +1379,10 @@ public boolean hasNext() {
13771379

13781380
@Override
13791381
public int nextInt() {
1382+
if (! hasNext()) throw new NoSuchElementException();
13801383
last++;
13811384
int d = 0;
1385+
outdegree = -1;
13821386
int i;
13831387

13841388
try {
@@ -1395,8 +1399,8 @@ public int nextInt() {
13951399
if (--inputStreamLength[i] == 0) {
13961400
queue.dequeue();
13971401
batchIbs[i].close();
1398-
labelInputBitStream[i].close();
13991402
batchIbs[i] = null;
1403+
labelInputBitStream[i].close();
14001404
labelInputBitStream[i] = null;
14011405
}
14021406
else {
@@ -1410,8 +1414,19 @@ public int nextInt() {
14101414
}
14111415
d++;
14121416
}
1417+
1418+
numPairs = d;
1419+
}
1420+
catch(final IOException e) {
1421+
e.printStackTrace();
1422+
throw new RuntimeException(this + " " + e);
1423+
}
1424+
1425+
// Compute outdegree
1426+
if (outdegree == -1) {
1427+
final int numPairs = this.numPairs;
14131428
// Neither quicksort nor heaps are stable, so we reestablish order here.
1414-
it.unimi.dsi.fastutil.Arrays.quickSort(0, d, (x, y) -> Integer.compare(successor[x], successor[y]),
1429+
it.unimi.dsi.fastutil.Arrays.quickSort(0, numPairs, (x, y) -> Integer.compare(successor[x], successor[y]),
14151430
(x, y) -> {
14161431
final int t = successor[x];
14171432
successor[x] = successor[y];
@@ -1420,12 +1435,16 @@ public int nextInt() {
14201435
label[x] = label[y];
14211436
label[y] = l;
14221437
});
1423-
}
1424-
catch(final IOException e) {
1425-
throw new RuntimeException(e);
1438+
1439+
if (numPairs != 0) {
1440+
// Avoid returning the duplicate arcs
1441+
int p = 0;
1442+
for (int j = 1; j < numPairs; j++) if (successor[p] != successor[j]) successor[++p] = successor[j];
1443+
outdegree = p + 1;
1444+
}
1445+
else outdegree = 0;
14261446
}
14271447

1428-
outdegree = d;
14291448
return last;
14301449
}
14311450

@@ -1604,6 +1623,12 @@ public static int processTransposeBatch(final int n, final int[] source, final i
16041623
batchFile.deleteOnExit();
16051624
batches.add(batchFile);
16061625
final OutputBitStream batch = new OutputBitStream(batchFile);
1626+
1627+
final File labelFile = File.createTempFile("label-", ".bits", tempDir);
1628+
labelFile.deleteOnExit();
1629+
labelBatches.add(labelFile);
1630+
final OutputBitStream labelObs = new OutputBitStream(labelFile);
1631+
16071632
int u = 0;
16081633

16091634
if (n != 0) {
@@ -1616,32 +1641,35 @@ public static int processTransposeBatch(final int n, final int[] source, final i
16161641
batch.writeDelta(prevSource);
16171642
batch.writeDelta(target[0]);
16181643

1644+
labelBitStream.position(start[0]);
1645+
prototype.fromBitStream(labelBitStream, source[0]);
1646+
prototype.toBitStream(labelObs, target[0]);
1647+
16191648
for(int i = 1; i < n; i++) {
16201649
if (source[i] != prevSource) {
16211650
batch.writeDelta(source[i] - prevSource);
16221651
batch.writeDelta(target[i]);
16231652
prevSource = source[i];
1653+
1654+
labelBitStream.position(start[i]);
1655+
prototype.fromBitStream(labelBitStream, source[i]);
1656+
prototype.toBitStream(labelObs, target[i]);
16241657
}
16251658
else if (target[i] != target[i - 1]) {
16261659
// We don't write duplicate pairs
16271660
batch.writeDelta(0);
16281661
batch.writeDelta(target[i] - target[i - 1] - 1);
1662+
1663+
labelBitStream.position(start[i]);
1664+
prototype.fromBitStream(labelBitStream, source[i]);
1665+
prototype.toBitStream(labelObs, target[i]);
16291666
}
16301667
}
16311668
}
1669+
16321670
else batch.writeDelta(0);
16331671

16341672
batch.close();
1635-
1636-
final File labelFile = File.createTempFile("label-", ".bits", tempDir);
1637-
labelFile.deleteOnExit();
1638-
labelBatches.add(labelFile);
1639-
final OutputBitStream labelObs = new OutputBitStream(labelFile);
1640-
for (int i = 0; i < n; i++) {
1641-
labelBitStream.position(start[i]);
1642-
prototype.fromBitStream(labelBitStream, source[i]);
1643-
prototype.toBitStream(labelObs, target[i]);
1644-
}
16451673
labelObs.close();
16461674

16471675
return u;

0 commit comments

Comments
 (0)