Skip to content

Ameliorations rochefort #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions lib/test/wip_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# self-contained tests embryo
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests qui m'ont permis de mettre au point le petit outil de résumé XML.

elixir lib/test/wip_test.exs pour lancer.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(ça me permet aussi de vérifier si on n'a pas trop cassé de code à côté).


Mix.install([
{:unzip, "~> 0.12.0"},
{:saxy, "~> 1.6"}
])

ExUnit.start()
Code.require_file("../zip_support.ex", __DIR__)
Code.require_file("../xml_support.ex", __DIR__)
Code.require_file("../xml_summariser.ex", __DIR__)
Code.require_file("../helpers.ex", __DIR__)

defmodule Tests do
use ExUnit.Case

@folder_root Path.join(__DIR__, "../..")
@file_path "data/ca_rochefort_ocean-aggregated-netex-2025-03-03.zip"

def one!(list, regexp) do
list
|> Enum.filter(&(&1 =~ regexp))
|> Helpers.one!()
end

test "summarises XML binary" do
content = "<Root><Child/><Child/><Other/><Child/></Root>"
summary = XmlSummariser.summarise!(content)

assert summary == """
<Root>
(...) (x4)\
"""
end

test "summarises XML stream" do
# useful to ensure we don't break the unzip/iodata/binary dance
zip =
@file_path
|> Path.expand(@folder_root)
|> ZipSupport.open_zip!()

filename =
zip
|> ZipSupport.list_zip_entries()
|> one!(~r/commun/)

output =
zip
|> Unzip.file_stream!(filename)
|> Stream.map(&IO.iodata_to_binary(&1))
|> XmlSummariser.summarise!()

assert output =~ ~r/\<Line\> \(x14\)/
end
end
70 changes: 70 additions & 0 deletions lib/xml_summariser.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# TODO: port to xmerl_sax_handler to reduce external dependencies
# NOTE: for much larger docs later, this is the way https://github.com/qcam/saxy/pull/100
defmodule XmlSummariser do
# Works with file/unzip streams
def summarise!(stream) when is_struct(stream, Stream) or is_list(stream) do
{:ok, output} =
stream
|> Saxy.parse_stream(XmlSummariser.Sax.Handler, [])

output
end

# Raw binary version (useful for simple string passing)
def summarise!(binary) when is_binary(binary) do
summarise!([binary])
end

defmodule Sax.Handler do
@moduledoc """
A `Saxy` handler useful to provide a textual résumé of a XML.
"""

@behaviour Saxy.Handler

def format_line([item]), do: item
def format_line(items = [item | _tail]), do: "#{item} (x#{items |> length})"

def handle_event(:start_document, _options, []) do
{:ok, %{level: 0, output: []}}
end

def handle_event(:end_document, _options, state) do
output =
state.output
|> Enum.chunk_by(& &1)
|> Enum.map(&format_line(&1))
|> Enum.join("\n")

{:ok, output}
end

def handle_event(:start_element, {name, _attributes}, state) do
indent = String.duplicate(" ", state.level)

state =
state
|> Map.update!(:level, &(&1 + 1))

state =
state
|> Map.update!(:output, fn output ->
# TODO: support customisation, this is currently hardcoded to presentation specific needs
cond do
state.level > 5 -> output
state.level in 2..3 -> output ++ [indent <> "(...)"]
true -> output ++ [indent <> "<" <> name <> ">"]
end
end)

{:ok, state}
end

def handle_event(:end_element, _, state) do
state = state |> Map.update!(:level, &(&1 - 1))
{:ok, state}
end

def handle_event(_, _, state), do: {:ok, state}
end
end
Loading