forked from cabservicesag/fixMixedEncoding
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix_mixed_encoding.php
More file actions
106 lines (104 loc) · 2.42 KB
/
fix_mixed_encoding.php
File metadata and controls
106 lines (104 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
<?php
/**
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @author Jonas Felix <jf@cabag.ch>
* @author Tizian Schmidlin <st@cabag.ch>
* @packag fix_mixed_encoding
*/
/**
* This script was originally cloned form https://github.com/cabservicesag/fixMixedEncoding.git
*/
// show usage
if(!is_file($argv[1]) || empty($argv[0])) {
echo "\nUsage: > php fix_mixed_encoding.php inputfile.sql outputfile.sql\n";
exit;
}
// replace damaged utf8 characters with fixed ones
function fixMixedCharacters($string) {
$searchReplace = array(
'ü'=>'ü',
'ä'=>'ä',
'ö'=>'ö',
'Ö'=>'Ö',
'ß'=>'ß',
'Ã '=>'à',
'á'=>'á',
'â'=>'â',
'ã'=>'ã',
'ù'=>'ù',
'ú'=>'ú',
'û'=>'û',
'Ù'=>'Ù',
'Ú'=>'Ú',
'Û'=>'Û',
'Ü'=>'Ü',
'ò'=>'ò',
'ó'=>'ó',
'ô'=>'ô',
'è'=>'è',
'é'=>'é',
'ê'=>'ê',
'ë'=>'ë',
'À'=>'À',
'Ã'=>'Á',
'Â'=>'Â',
'Ã'=>'Ã',
'Ä'=>'Ä',
'Ã…'=>'Å',
'Ç'=>'Ç',
'È'=>'È',
'É'=>'É',
'Ê'=>'',
'Ë'=>'Ë',
'ÃŒ'=>'Ì',
'Ã'=>'Í',
'ÃŽ'=>'Î',
'Ã'=>'Ï',
'Ñ'=>'Ñ',
'Ã’'=>'Ò',
'Ó'=>'Ó',
'Ô'=>'Ô',
'Õ'=>'Õ',
'Ø'=>'Ø',
'Ã¥'=>'å',
'æ'=>'æ',
'ç'=>'ç',
'ì'=>'ì',
'Ã'=>'í',
'î'=>'î',
'ï'=>'ï',
'ð'=>'ð',
'ñ'=>'ñ',
'õ'=>'õ',
'ø'=>'ø',
'ý'=>'ý',
'ÿ'=>'ÿ',
'€'=>'€'
);
return str_replace(array_keys($searchReplace), $searchReplace, $string);
}
// open input file
$fp = fopen($argv[1], ‚r‘);
// open/create output file
$fp2 = fopen($argv[2], ‚w+‘);
// read the whole file by 4098 byte pieces and fix the encoding
while(!feof($fp)) {
$fixThisString = fread($fp, 4098);
$fixThisString = fixMixedCharacters($fixThisString);
fwrite($fp2, $fixThisString);
}
fclose($fp);
fclose($fp2);