Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 23 additions & 30 deletions dev/nextjournal/markdown/parser.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
;; - `:heading-level` specific of `:heading` nodes
;; - `:attrs` attributes as passed by markdown-it tokens (e.g `{:style "some style info"}`)
(ns nextjournal.markdown.parser
(:require [clojure.string :as str]
(:require #?@(:cljs [[applied-science.js-interop :as j]])
[clojure.string :as str]
[clojure.zip :as z]
[nextjournal.markdown :as md]
[nextjournal.markdown.impl.utils :as utils]
[nextjournal.markdown.transform :as md.transform]
[nextjournal.markdown.utils.emoji :as emoji]
#?@(:cljs [[applied-science.js-interop :as j]])))
[nextjournal.markdown.utils.emoji :as emoji]))

;; clj common accessors
(def get-in* #?(:clj get-in :cljs j/get-in))
Expand Down Expand Up @@ -193,8 +194,10 @@
(fn [node cs] (assoc node :content (vec cs)))
doc))

(defn assign-node-id+emoji [{:as doc ::keys [id->index path] :keys [text->id+emoji-fn]}]
(let [{:keys [id emoji]} (when (ifn? text->id+emoji-fn) (-> doc (get-in path) text->id+emoji-fn))
(defn assign-node-id+emoji [{:as doc ::keys [id->index path] :keys [opts]}]
(let [{:keys [text->id+emoji-fn]} opts
{:keys [id emoji]} (when (ifn? text->id+emoji-fn)
(-> doc (get-in path) text->id+emoji-fn))
id-count (when id (get id->index id))]
(cond-> doc
id
Expand Down Expand Up @@ -446,7 +449,7 @@ And what.
insert-sidenote-containers)

(-> empty-doc
(update :text-tokenizers (partial map normalize-tokenizer))
(update-in [:opts :text-tokenizers] (partial map normalize-tokenizer))
(apply-tokens (nextjournal.markdown/tokenize "what^[the heck]"))
insert-sidenote-columns
(apply-tokens (nextjournal.markdown/tokenize "# Hello"))
Expand Down Expand Up @@ -510,7 +513,7 @@ And what.

_this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not."
nextjournal.markdown/tokenize
(parse (update empty-doc :text-tokenizers conj hashtag-tokenizer))))
(parse (update-in empty-doc [:opts :text-tokenizers] conj hashtag-tokenizer))))


(defn normalize-tokenizer
Expand Down Expand Up @@ -546,27 +549,27 @@ _this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not."
(conj (text-hnode remaining-text))))
[node])))

(defmethod apply-token "text" [{:as doc :keys [text-tokenizers]} {:keys [content]}]
(defmethod apply-token "text" [{:as doc :keys [opts]} {:keys [content]}]
(reduce (fn [doc {:as node :keys [doc-handler]}] (doc-handler doc (dissoc node :doc-handler)))
doc
(reduce (fn [nodes tokenizer]
(mapcat (fn [{:as node :keys [type]}]
(if (= :text type) (tokenize-text-node tokenizer doc node) [node]))
nodes))
[{:type :text :text content :doc-handler push-node}]
text-tokenizers)))
(:text-tokenizers opts))))

(comment
(def mustache (normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}" :handler (fn [m] {:type :eval :text (m 1)})}))
(tokenize-text-node mustache {} {:text "{{what}} the {{hellow}}"})
(apply-token (assoc empty-doc :text-tokenizers [mustache])
(apply-token (assoc-in empty-doc [:opts :text-tokenizers] [mustache])
{:type "text" :content "foo [[bar]] dang #hashy taggy [[what]] #dangy foo [[great]] and {{eval}} me"})

(parse (assoc empty-doc
:text-tokenizers
[(normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}"
:doc-handler (fn [{:as doc ::keys [path]} {[_ meta] :match}]
(update-in doc (ppop path) assoc :meta meta))})])
(parse (assoc-in empty-doc
[:opts :text-tokenizers]
[(normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}"
:doc-handler (fn [{:as doc ::keys [path]} {[_ meta] :match}]
(update-in doc (ppop path) assoc :meta meta))})])
(nextjournal.markdown/tokenize "# Title {{id=heading}}
* one
* two")))
Expand Down Expand Up @@ -606,27 +609,17 @@ _this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not."
(let [mapify-attrs-xf (map (fn [x] (update* x :attrs pairs->kmap)))]
(reduce (mapify-attrs-xf apply-token) doc tokens)))

(def empty-doc {:type :doc
:content []
;; Id -> Nat, to disambiguate ids for nodes with the same textual content
::id->index {}
;; Node -> {id : String, emoji String}, dissoc from context to opt-out of ids
:text->id+emoji-fn (comp text->id+emoji md.transform/->text)
:toc {:type :toc}
:footnotes []
::path [:content -1] ;; private
:text-tokenizers []})
(def empty-doc utils/empty-doc)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have the parser again? Seems to have quite a bit of code duplication and it's not really tested from what I can tell.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is in dev and I think no longer used?


(defn parse
"Takes a doc and a collection of markdown-it tokens, applies tokens to doc. Uses an emtpy doc in arity 1."
([tokens] (parse empty-doc tokens))
([doc tokens] (-> doc
(update :text-tokenizers (partial map normalize-tokenizer))
(update-in [:opts :text-tokenizers] (partial map normalize-tokenizer))
(apply-tokens tokens)
(dissoc ::path
::id->index
:text-tokenizers
:text->id+emoji-fn))))
(dissoc :opts
::path
::id->index))))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to update this as well?


(comment

Expand Down
13 changes: 5 additions & 8 deletions src/nextjournal/markdown.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,13 @@
(defn parse
"Turns the given `markdown-string` into an AST of nested clojure data.

Accepted `config` options:
Accepted `opts`:
- `:text-tokenizers`: customize parsing of text in leaf nodes (see https://nextjournal.github.io/markdown/notebooks/parsing_extensibility).
- `:disable-inline-formulas`: turn off parsing of $-delimited inline formulas.
"
- `:disable-inline-formulas`: turn off parsing of $-delimited inline formulas."
([markdown-string] (parse {} markdown-string))
([config markdown-string]
(-> (parse* config markdown-string)
(dissoc :disable-inline-formulas
:text-tokenizers
:text->id+emoji-fn
([opts markdown-string]
(-> (parse* {:opts opts} markdown-string)
(dissoc :opts
::impl/footnote-offset
::impl/id->index
::impl/label->footnote-ref
Expand Down
6 changes: 3 additions & 3 deletions src/nextjournal/markdown/impl.clj
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,8 @@
(set (keys ctx))
(set (keys u/empty-doc))))
;; only settings were provided, we add the empty doc
(recur (merge u/empty-doc ctx) md)
(node->data (update ctx :text-tokenizers (partial map u/normalize-tokenizer))
(recur (merge ctx (update u/empty-doc :opts merge (:opts ctx))) md)
(node->data (update-in ctx [:opts :text-tokenizers] (partial mapv u/normalize-tokenizer))
(.parse (parser ctx) md)))))

(comment
Expand All @@ -311,7 +311,7 @@
(parse "some para^[with other note]"))

(parse "some `marks` inline and inline $formula$ with a [link _with_ em](https://what.tfk)")
(parse (assoc u/empty-doc :text-tokenizers [u/internal-link-tokenizer])
(parse (assoc-in u/empty-doc [:otps :text-tokenizers] [u/internal-link-tokenizer])
"what a [[link]] is this")
(parse "what the <em>real</em> deal is")
(parse "some
Expand Down
13 changes: 7 additions & 6 deletions src/nextjournal/markdown/impl.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
#_ u/insert-sidenote-containers)

(-> empty-doc
(update :text-tokenizers (partial map u/normalize-tokenizer))
(update-in [:opts :text-tokenizers] (partial mapv u/normalize-tokenizer))
(apply-tokens (nextjournal.markdown/tokenize "what^[the heck]"))
insert-sidenote-columns
(apply-tokens (nextjournal.markdown/tokenize "# Hello"))
Expand Down Expand Up @@ -212,15 +212,15 @@
> what about #this

_this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not."
(parse (update empty-doc :text-tokenizers conj (u/normalize-tokenizer u/hashtag-tokenizer)))))
(parse (update-in empty-doc [:opts :text-tokenizers] conj (u/normalize-tokenizer u/hashtag-tokenizer)))))

(defmethod apply-token "text" [ctx ^js token]
(u/handle-text-token ctx (.-content token)))

(comment
(def mustache (u/normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}" :handler (fn [m] {:type :eval :text (m 1)})}))
(u/tokenize-text-node mustache {} {:text "{{what}} the {{hellow}}"})
(u/handle-text-token (assoc u/empty-doc :text-tokenizers [mustache])
(u/handle-text-token (assoc-in u/empty-doc [:opts :text-tokenizers] [mustache])
"foo [[bar]] dang #hashy taggy [[what]] #dangy foo [[great]] and {{eval}} me"))

;; inlines
Expand Down Expand Up @@ -280,15 +280,15 @@ _this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not."
(if (not (set/superset?
(set (keys ctx-in))
(set (keys u/empty-doc))))
(recur (merge u/empty-doc ctx-in) markdown)
(recur (merge ctx-in (update u/empty-doc :opts merge (:opts ctx-in))) markdown)
(let [{:as ctx-out :keys [doc title toc footnotes] ::keys [label->footnote-ref]}
(-> ctx-in
(assoc ::footnote-offset (count (::label->footnote-ref ctx-in)))
(update :text-tokenizers (partial map u/normalize-tokenizer))
(update-in [:opts :text-tokenizers] (partial mapv u/normalize-tokenizer))
(assoc :doc (u/->zip ctx-in)
:footnotes (u/->zip {:type :footnotes
:content (or (:footnotes ctx-in) [])}))
(apply-tokens (md/tokenize #js {:disable_inline_formulas (:disable-inline-formulas ctx-in)}
(apply-tokens (md/tokenize #js {:disable_inline_formulas (:disable-inline-formulas (:opts ctx-in))}
markdown)))]
(-> ctx-out
(dissoc :doc)
Expand Down Expand Up @@ -365,3 +365,4 @@ some final par"
(mapcat (partial tree-seq (comp seq :children) :children))
(map #(select-keys % [:type :content :hidden :level :info :meta])))
tokens))

2 changes: 1 addition & 1 deletion src/nextjournal/markdown/impl/extensions.clj
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
(extend [^Parser$Builder pb]
(.customBlockParserFactory pb block-toc-parser-factory)
(.customBlockParserFactory pb block-formula-parser-factory)
(when-not (:disable-inline-formulas ctx)
(when-not (:disable-inline-formulas (:opts ctx))
(.customInlineContentParserFactory pb (reify InlineContentParserFactory
(getTriggerCharacters [_] #{\$})
(create [_] (inline-formula-parser))))))))
Expand Down
15 changes: 8 additions & 7 deletions src/nextjournal/markdown/impl/utils.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@
:content []
:toc {:type :toc}
:footnotes []
:text-tokenizers []
;; Node -> {id : String, emoji String}, dissoc from context to opt-out of ids
:text->id+emoji-fn (comp text->id+emoji md.transform/->text)
:opts {:text-tokenizers []
;; Node -> {id : String, emoji String}, dissoc from context to opt-out of ids
:text->id+emoji-fn (comp text->id+emoji md.transform/->text)}

;; private
;; Id -> Nat, to disambiguate ids for nodes with the same textual content
Expand Down Expand Up @@ -162,7 +162,8 @@
(reduce (xf rf) (assoc doc :toc {:type :toc}) content)))

(defn handle-close-heading [ctx]
(let [{:keys [text->id+emoji-fn] :nextjournal.markdown.impl/keys [id->index]} ctx
(let [{:keys [opts] :nextjournal.markdown.impl/keys [id->index]} ctx
{:keys [text->id+emoji-fn]} opts
heading-loc (current-loc ctx)
heading (z/node heading-loc)
{:keys [id emoji]} (when (ifn? text->id+emoji-fn)
Expand Down Expand Up @@ -244,7 +245,7 @@ end"
(-> acc
(update :remaining-text subs 0 start)
(cond->
(< end (count remaining-text))
(< end (count remaining-text))
(update :nodes conj (text-hnode (subs remaining-text end))))
(update :nodes conj {:doc-handler doc-handler
:match match :text text
Expand All @@ -256,7 +257,7 @@ end"
(conj (text-hnode remaining-text))))
[node])))

(defn handle-text-token [{:as ctx :keys [text-tokenizers]} text]
(defn handle-text-token [{:as ctx :keys [opts]} text]
(reduce (fn [ctx {:as node :keys [doc-handler]}] (update-current-loc ctx doc-handler (dissoc node :doc-handler)))
ctx
(reduce (fn [nodes tokenizer]
Expand All @@ -266,7 +267,7 @@ end"
(cond-> []
(not (empty? text))
(conj {:type :text :text text :doc-handler z/append-child}))
text-tokenizers)))
(:text-tokenizers opts))))

;; clj
#_(handle-text-token (->zip {:type :doc :content []}) "some-text")
Expand Down
Loading
Loading