Skip to content

Commit 7b12013

Browse files
authored
Created script
1 parent 5503e4c commit 7b12013

File tree

2 files changed

+248
-1
lines changed

2 files changed

+248
-1
lines changed

README.md

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,31 @@
11
# pdf-diff
2-
A tool for visualizing differences between two pdf files.
2+
A tool for visualizing differences between two pdf files. Mainly dedicated to editors that spent a lot of hours on several pdf.
3+
4+
## STATUS: Work in progress
5+
6+
## Foreword
7+
8+
I use [Indesign](https://www.adobe.com/it/products/indesign.html) almost daily, and the pagination and convenient graphical interface make that product number 1 among desktop publishing programs. Indesign, as well as many other graphics programs, have one flaw: because they are not based on any versioning tool, it is difficult to compare two versions of the same file
9+
10+
I sometimes have to do some retouching to files I produce. Be they resumes, books or technical manuals. However, if editing a resume is very easy, editing large volumes is much more difficult. Several times, sharing the result of pdfs with my team, we could not clearly visualize the differences between one version and another. This is compounded by human error: with more than 50-60 pages to review, it is impossible to keep track of all the changes between versions!
11+
12+
Therefore, I developed through the powerful go programming language a new tool called `pdf-diff`. Pdf-diff allows you to create images that show exactly where the pdf has changed, thus displaying the changes from one version to another.
13+
14+
## How it works
15+
16+
From a technical point of view, the tool is very simple and trivial. Pdf-diff uses pdftoppm to generate a series of images from the pdfs to be compared (one for each page). It then uses a very trivial pixel comparison algorithm to draw some red rectangles that display the differences between one pdf and another. The go script also uses golang's very powerful native encoding/decoding engine (which I personally was not familiar with!). I was very impressed with what is possible to do co Go in just a few lines of code.
17+
18+
The code is not very clean and certainly can be optimized. I am asking some person much more knowledgeable than me in graphics if it is possible to create a simple algorithm that can apply a background color only locally, and not on the whole row where the pixel is changed.
19+
20+
## How to use
21+
22+
work in progress
23+
24+
### Contact
25+
26+
If you wish to use this for your project, go ahead. If you have any issues or improvements, feel free to open a new [ISSUE]. Lastly, if you have a good algorithm to implement or just to discuss about any other tools for editor, you can [email me](hi@serhack.me)
27+
28+
#### Donation
29+
30+
Monero: `47VFueCo1yvc6nq688QsBt9UZSrg5z2JLFUwWFs4WtHBSwDsybDbnmLiydo46ybPeqSMxypnjmz5pdz87t4VjngfQfmMd4S`
31+
Bitcoin: `1Pt3YwkFoexAA3s9pV3saoJ2EAXzpqBmrp`

main.go

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
package main
2+
3+
import (
4+
"crypto/sha256"
5+
"errors"
6+
"fmt"
7+
"image"
8+
"image/color"
9+
"image/png"
10+
"io"
11+
"os"
12+
"os/exec"
13+
"strconv"
14+
"strings"
15+
)
16+
17+
// Structure for Pixel. Used as float to make operations more easily.
18+
type Pixel struct {
19+
r,g,b,a float64
20+
}
21+
22+
func rgbaToPixel(r uint32, g uint32, b uint32, a uint32) Pixel{
23+
return Pixel{float64(r), float64(g), float64(b), float64(a)}
24+
}
25+
26+
27+
func CreatePNG(PDFPath string) {
28+
29+
fmt.Println("Image generation for: " + PDFPath)
30+
31+
// Computes the sha256 hash
32+
folderName := ComputeSha256(PDFPath)
33+
34+
// Checks if a folder with the name sha256(file) already exists
35+
if _, err := os.Stat(folderName); err == nil {
36+
return
37+
}
38+
39+
// If not, probably we never met this pdf. Create the folder
40+
err := os.Mkdir(folderName, os.ModePerm)
41+
if err != nil {
42+
panic(err)
43+
}
44+
45+
// Create the images
46+
cmd, _ := exec.Command("pdftoppm", "-png", PDFPath, folderName+"/png_gen").Output()
47+
fmt.Println(cmd)
48+
}
49+
50+
func RetrievePixel(fileName string) ([][]Pixel, int, int) {
51+
infile, err := os.Open(fileName)
52+
if err != nil {
53+
panic(err)
54+
}
55+
defer infile.Close()
56+
57+
img, _, err := image.Decode(infile)
58+
if err != nil {
59+
panic(err)
60+
}
61+
62+
bounds := img.Bounds()
63+
width, height := bounds.Max.X, bounds.Max.Y
64+
var pixels [][]Pixel
65+
for y := bounds.Min.Y; y < height; y++ {
66+
var row []Pixel
67+
for x := bounds.Min.X; x < width; x++ {
68+
row = append(row, rgbaToPixel(img.At(x, y).RGBA()))
69+
}
70+
pixels = append(pixels, row)
71+
}
72+
return pixels, width, height
73+
}
74+
75+
func drawSection(row []Pixel) {
76+
for i := 0; i < len(row); i++ {
77+
row[i].g = row[i].g * 0.7
78+
row[i].b = row[i].b * 0.9
79+
}
80+
}
81+
82+
func CompareSingleImage(path1 string, path2 string, i int) {
83+
84+
sha1 := ComputeSha256(path1)
85+
sha2 := ComputeSha256(path2)
86+
87+
// If the two images have the same hash, the two pages are the same.
88+
if sha1 == sha2{
89+
fmt.Printf("The pages number %d are the same.\n", i)
90+
return
91+
}
92+
93+
pixel_1, x_1, y_1 := RetrievePixel(path1)
94+
pixel_2, x_2, y_2 := RetrievePixel(path2)
95+
96+
if x_1 != x_2 {
97+
if y_1 != y_2 {
98+
fmt.Println("Warning: comparing two pdfs that do not have the same dimensions might cause some problems.")
99+
}
100+
}
101+
102+
pixel_3 := pixel_2
103+
104+
for y := 0; y < len(pixel_1); y++ {
105+
for x := 0; x < len(pixel_1[y]); x++ {
106+
result := compareSinglePixel(pixel_1[y][x], pixel_2[y][x])
107+
if !result {
108+
drawSection(pixel_3, y)
109+
}
110+
}
111+
}
112+
113+
img := image.NewNRGBA(image.Rect(0, 0, x_1, y_1))
114+
for y := 0; y < y_1; y++ {
115+
for x := 0; x < x_1; x++ {
116+
img.Set(x, y, color.RGBA{
117+
R: uint8(pixel_3[y][x].r),
118+
G: uint8(pixel_3[y][x].g),
119+
B: uint8(pixel_3[y][x].b),
120+
A: uint8(pixel_3[y][x].a),
121+
})
122+
}
123+
}
124+
125+
// Create the file under "generated" folder
126+
f, err := os.Create("generated/image-" + strconv.Itoa(i) + ".png")
127+
if err != nil {
128+
panic(err)
129+
}
130+
131+
// Encode the image
132+
if err := png.Encode(f, img); err != nil {
133+
f.Close()
134+
panic(err)
135+
}
136+
137+
if err := f.Close(); err != nil {
138+
panic(err)
139+
}
140+
141+
}
142+
143+
func compareSinglePixel(image1 Pixel, image2 Pixel) bool {
144+
// Returns true if two pixel are the same pixel
145+
if image1.b == image2.b && image1.g == image2.g && image1.r == image2.r && image1.a == image2.a {
146+
return true
147+
}
148+
return false
149+
}
150+
151+
func ComputeSha256(filePath string) string {
152+
// Computes the hash of any file
153+
f, err := os.Open(filePath)
154+
if err != nil {
155+
panic(err)
156+
}
157+
defer f.Close()
158+
159+
h := sha256.New()
160+
if _, err := io.Copy(h, f); err != nil {
161+
panic(err)
162+
}
163+
164+
return fmt.Sprintf("%x", h.Sum(nil))
165+
}
166+
167+
func Compare(PDF1 string, PDF2 string) {
168+
// Compares the two files
169+
170+
shaPDF1 := ComputeSha256(PDF1)
171+
shaPDF2 := ComputeSha256(PDF2)
172+
173+
err := os.Mkdir("generated", os.ModePerm)
174+
if err != nil {
175+
panic(err)
176+
}
177+
178+
i := 1
179+
k := 1
180+
for {
181+
// pdftoppm creates pngs and the numbers are padded with a variable numbers of 0.
182+
// e.g. pdf contains <= 99 pages => 01.. 02.. 03..
183+
// pdf contains <= 999 pages => 001.. 002.. 003
184+
185+
o := fmt.Sprintf("%d", k)
186+
s := fmt.Sprintf("%0" + o + "d", i)
187+
188+
s_pdf1 := shaPDF1 + "/png_gen-" + s + ".png"
189+
s_pdf2 := shaPDF2 + "/png_gen-" + s + ".png"
190+
191+
if _, err := os.Stat(s_pdf1); errors.Is(err, os.ErrNotExist) {
192+
// TODO: remove this println
193+
fmt.Println("File " + s_pdf1 + " does not exist.")
194+
k++
195+
if k == 12{
196+
break
197+
}
198+
} else {
199+
CompareSingleImage(s_pdf1, s_pdf2, i)
200+
i++
201+
}
202+
203+
}
204+
205+
}
206+
207+
func main(){
208+
fmt.Println("pdf-diff: highlights the differences between two pdf files.")
209+
if len(os.Args) < 2 {
210+
fmt.Println("You need to specify two parameters!")
211+
}
212+
213+
CreatePNG(os.Args[1])
214+
CreatePNG(os.Args[2])
215+
216+
Compare(os.Args[1], os.Args[2])
217+
218+
}

0 commit comments

Comments
 (0)