@@ -11,7 +11,6 @@ defmodule DiffWeb.LiveView do
11
11
DiffWeb.TooLargeComponent . render ( % { file: file_path } )
12
12
|> Phoenix.HTML.Safe . to_iodata ( )
13
13
|> IO . iodata_to_binary ( )
14
- |> sanitize_utf8 ( )
15
14
16
15
{ :ok , % { "diff" => raw_diff , "path_from" => path_from , "path_to" => path_to } } ->
17
16
case GitDiff . parse_patch ( raw_diff , relative_from: path_from , relative_to: path_to ) do
@@ -38,42 +37,16 @@ defmodule DiffWeb.LiveView do
38
37
end
39
38
end
40
39
41
- defp sanitize_utf8 ( content ) when is_binary ( content ) do
42
- case String . valid? ( content ) do
43
- true ->
44
- content
45
-
46
- false ->
47
- # Multiple fallback strategies for invalid UTF-8
48
- sanitize_invalid_bytes ( content )
49
- end
50
- end
51
-
52
- defp sanitize_utf8 ( content ) , do: content
53
-
54
- defp sanitize_invalid_bytes ( content ) do
55
- # Try different encoding conversions and fallbacks
56
- cond do
57
- # Try converting from Latin-1/ISO-8859-1 encoding
58
- latin1_result = safe_unicode_convert ( content , :latin1 , :utf8 ) ->
59
- latin1_result
60
-
61
- # Last resort: replace invalid bytes with replacement character
62
- true ->
63
- content
64
- |> :binary . bin_to_list ( )
65
- # Replace high bytes with '?'
66
- |> Enum . map ( fn byte -> if byte > 127 , do: 63 , else: byte end )
67
- |> :binary . list_to_bin ( )
68
- end
69
- end
70
-
71
- defp safe_unicode_convert ( content , from , to ) do
72
- case :unicode . characters_to_binary ( content , from , to ) do
73
- result when is_binary ( result ) -> result
74
- _ -> nil
75
- end
76
- rescue
77
- _ -> nil
40
+ def sanitize_utf8 ( content ) when is_binary ( content ) do
41
+ content
42
+ |> String . chunk ( :valid )
43
+ |> Enum . map ( fn chunk ->
44
+ if String . valid? ( chunk ) do
45
+ chunk
46
+ else
47
+ String . duplicate ( "?" , byte_size ( chunk ) )
48
+ end
49
+ end )
50
+ |> Enum . join ( "" )
78
51
end
79
52
end
0 commit comments