Skip to content

Commit b9f3551

Browse files
adurand-csthbar
andauthored
Ameliorations rochefort (#20)
* Non-distinction scolaire/non-scolaire * Exercices (part 1) * Ajout Timestamps (1er jet) + exo course * Suppression TODO * Liens autres parties * Fix broken HTML * Switch to markdown for collapse * Save reformatted code * Save reformatted code * Move file listing from DataTable to markdown list (try to be clearer) * Wip test for summariser * Compute summary * Format * Extract XmlSummariser module to file * Separate dedicated Saxy part before adding more there * Finalise tests * Add < > --------- Co-authored-by: Thibaut Barrère <thibaut.barrere@gmail.com>
1 parent a91aefc commit b9f3551

File tree

3 files changed

+442
-97
lines changed

3 files changed

+442
-97
lines changed

lib/test/wip_test.exs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# self-contained tests embryo
2+
3+
Mix.install([
4+
{:unzip, "~> 0.12.0"},
5+
{:saxy, "~> 1.6"}
6+
])
7+
8+
ExUnit.start()
9+
Code.require_file("../zip_support.ex", __DIR__)
10+
Code.require_file("../xml_support.ex", __DIR__)
11+
Code.require_file("../xml_summariser.ex", __DIR__)
12+
Code.require_file("../helpers.ex", __DIR__)
13+
14+
defmodule Tests do
15+
use ExUnit.Case
16+
17+
@folder_root Path.join(__DIR__, "../..")
18+
@file_path "data/ca_rochefort_ocean-aggregated-netex-2025-03-03.zip"
19+
20+
def one!(list, regexp) do
21+
list
22+
|> Enum.filter(&(&1 =~ regexp))
23+
|> Helpers.one!()
24+
end
25+
26+
test "summarises XML binary" do
27+
content = "<Root><Child/><Child/><Other/><Child/></Root>"
28+
summary = XmlSummariser.summarise!(content)
29+
30+
assert summary == """
31+
<Root>
32+
(...) (x4)\
33+
"""
34+
end
35+
36+
test "summarises XML stream" do
37+
# useful to ensure we don't break the unzip/iodata/binary dance
38+
zip =
39+
@file_path
40+
|> Path.expand(@folder_root)
41+
|> ZipSupport.open_zip!()
42+
43+
filename =
44+
zip
45+
|> ZipSupport.list_zip_entries()
46+
|> one!(~r/commun/)
47+
48+
output =
49+
zip
50+
|> Unzip.file_stream!(filename)
51+
|> Stream.map(&IO.iodata_to_binary(&1))
52+
|> XmlSummariser.summarise!()
53+
54+
assert output =~ ~r/\<Line\> \(x14\)/
55+
end
56+
end

lib/xml_summariser.ex

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# TODO: port to xmerl_sax_handler to reduce external dependencies
2+
# NOTE: for much larger docs later, this is the way https://github.com/qcam/saxy/pull/100
3+
defmodule XmlSummariser do
4+
# Works with file/unzip streams
5+
def summarise!(stream) when is_struct(stream, Stream) or is_list(stream) do
6+
{:ok, output} =
7+
stream
8+
|> Saxy.parse_stream(XmlSummariser.Sax.Handler, [])
9+
10+
output
11+
end
12+
13+
# Raw binary version (useful for simple string passing)
14+
def summarise!(binary) when is_binary(binary) do
15+
summarise!([binary])
16+
end
17+
18+
defmodule Sax.Handler do
19+
@moduledoc """
20+
A `Saxy` handler useful to provide a textual résumé of a XML.
21+
"""
22+
23+
@behaviour Saxy.Handler
24+
25+
def format_line([item]), do: item
26+
def format_line(items = [item | _tail]), do: "#{item} (x#{items |> length})"
27+
28+
def handle_event(:start_document, _options, []) do
29+
{:ok, %{level: 0, output: []}}
30+
end
31+
32+
def handle_event(:end_document, _options, state) do
33+
output =
34+
state.output
35+
|> Enum.chunk_by(& &1)
36+
|> Enum.map(&format_line(&1))
37+
|> Enum.join("\n")
38+
39+
{:ok, output}
40+
end
41+
42+
def handle_event(:start_element, {name, _attributes}, state) do
43+
indent = String.duplicate(" ", state.level)
44+
45+
state =
46+
state
47+
|> Map.update!(:level, &(&1 + 1))
48+
49+
state =
50+
state
51+
|> Map.update!(:output, fn output ->
52+
# TODO: support customisation, this is currently hardcoded to presentation specific needs
53+
cond do
54+
state.level > 5 -> output
55+
state.level in 2..3 -> output ++ [indent <> "(...)"]
56+
true -> output ++ [indent <> "<" <> name <> ">"]
57+
end
58+
end)
59+
60+
{:ok, state}
61+
end
62+
63+
def handle_event(:end_element, _, state) do
64+
state = state |> Map.update!(:level, &(&1 - 1))
65+
{:ok, state}
66+
end
67+
68+
def handle_event(_, _, state), do: {:ok, state}
69+
end
70+
end

0 commit comments

Comments
 (0)