8
8
require "set"
9
9
require 'fileutils'
10
10
require 'yaml'
11
+ require 'json'
11
12
require 'diffy'
12
13
require_relative "version"
13
14
require_relative 'asciidoctor-extensions'
@@ -95,6 +96,86 @@ def extract_headings(html)
95
96
headings
96
97
end
97
98
99
+ def extract_glossary_from_html ( content , lang = 'en' )
100
+ # skip front matter
101
+ content = content . split ( /^---$/ ) [ 2 ] || content
102
+
103
+ doc = Nokogiri ::HTML ::DocumentFragment . parse ( content )
104
+
105
+ glossary = { }
106
+
107
+ doc . css ( 'dt' ) . each do |dt |
108
+ def_anchor = dt . css ( 'a[id^="def_"]' ) . first
109
+ next unless def_anchor
110
+
111
+ term_id = def_anchor [ 'id' ]
112
+ next unless term_id &.start_with? ( 'def_' )
113
+
114
+ term_name = dt . text . strip
115
+ # hack to handle this one weird (also) thing
116
+ term_names = [ ]
117
+ if term_name == 'tree-ish (also treeish)'
118
+ term_names = [ 'tree-ish' , 'treeish' ]
119
+ elsif term_name == 'arbre-esque (aussi arbresque)'
120
+ term_names = [ 'arbre-esque' , 'arbresque' ]
121
+ else
122
+ term_names = [ term_name ]
123
+ end
124
+ current_element = dt . next_element
125
+ raise 'Expected dd' unless current_element &.name == 'dd'
126
+
127
+ # Fix up the links because they'regoing to be on a different page
128
+ if lang == 'en'
129
+ glossary_url = '/docs/gitglossary'
130
+ else
131
+ glossary_url = "/docs/gitglossary/#{ lang } "
132
+ end
133
+
134
+ definition_fragment = Nokogiri ::HTML ::DocumentFragment . parse ( current_element . inner_html . strip )
135
+ definition_fragment . css ( 'a[href^="#def_"]' ) . each do |link |
136
+ href = link [ 'href' ]
137
+ if href &.start_with? ( '#def_' )
138
+ link [ 'href' ] = "#{ glossary_url } #{ href } "
139
+ link [ 'target' ] = '_blank'
140
+ end
141
+ end
142
+ definition = definition_fragment . to_html
143
+
144
+ term_names . each do |term |
145
+ glossary [ term ] = definition
146
+ end
147
+ end
148
+
149
+ glossary
150
+ end
151
+
152
+ def save_glossary_files ( glossary_data_by_lang )
153
+ return if glossary_data_by_lang . empty?
154
+
155
+ glossary_dir = "#{ SITE_ROOT } static/js/glossary"
156
+ FileUtils . mkdir_p ( glossary_dir )
157
+
158
+ glossary_data_by_lang . each do |lang , glossary_data |
159
+ output_file = "#{ glossary_dir } /#{ lang } .json"
160
+ puts " saving glossary data to #{ output_file } (#{ glossary_data . size } terms)"
161
+ File . write ( output_file , JSON . pretty_generate ( glossary_data ) )
162
+ end
163
+ end
164
+
165
+ def mark_glossary_tooltips ( html , glossary_data_by_lang , lang )
166
+ current_glossary = glossary_data_by_lang [ lang ] || { }
167
+
168
+ html . gsub ( /<([^&]+)>/ ) do |match |
169
+ term = $1
170
+ # Only mark terms that exist in the glossary
171
+ if current_glossary . key? ( term )
172
+ "<span class=\" hover-term\" data-term=\" #{ term } \" ><#{ term } ></span>"
173
+ else
174
+ match
175
+ end
176
+ end
177
+ end
178
+
98
179
def index_l10n_doc ( filter_tags , doc_list , get_content )
99
180
rebuild = ENV . fetch ( "REBUILD_DOC" , nil )
100
181
rerun = ENV [ "RERUN" ] || rebuild || false
@@ -139,8 +220,15 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
139
220
end
140
221
141
222
check_paths = Set . new ( [ ] )
223
+ glossary_data_by_lang = { }
224
+
225
+ # Process glossary docs first so that we can use the parsed glossary to mark
226
+ # tooltip items in the other documents
227
+ glossary_docs = doc_files . select { |entry | File . basename ( entry [ 0 ] , ".#{ ext } " ) == 'gitglossary' }
228
+ other_docs = doc_files . reject { |entry | File . basename ( entry [ 0 ] , ".#{ ext } " ) == 'gitglossary' }
229
+ ordered_docs = glossary_docs + other_docs
142
230
143
- doc_files . each do |entry |
231
+ ordered_docs . each do |entry |
144
232
full_path , sha = entry
145
233
ids = Set . new ( [ ] )
146
234
lang = File . dirname ( full_path )
@@ -177,6 +265,12 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
177
265
next if !rerun && lang_data [ lang ] == asciidoc_sha
178
266
179
267
html = asciidoc . render
268
+
269
+ if path == 'gitglossary'
270
+ glossary_data_by_lang [ lang ] = extract_glossary_from_html ( html , lang )
271
+ puts " extracted #{ glossary_data_by_lang [ lang ] . size } glossary terms for #{ lang } "
272
+ end
273
+
180
274
html . gsub! ( /linkgit:(\S +?)\[ (\d +)\] / ) do |line |
181
275
x = /^linkgit:(\S +?)\[ (\d +)\] / . match ( line )
182
276
relurl = "docs/#{ x [ 1 ] . gsub ( /-/ , '-' ) } /#{ lang } "
@@ -223,6 +317,8 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
223
317
"#{ before } {{< relurl \" #{ after } \" >}}"
224
318
end
225
319
320
+ html = mark_glossary_tooltips ( html , glossary_data_by_lang , lang )
321
+
226
322
# Write <docname>/<lang>.html
227
323
front_matter = {
228
324
"category" => "manual" ,
@@ -248,6 +344,8 @@ def index_l10n_doc(filter_tags, doc_list, get_content)
248
344
lang_data [ lang ] = asciidoc_sha
249
345
end
250
346
347
+ save_glossary_files ( glossary_data_by_lang )
348
+
251
349
# In some cases, translations are not complete. As a consequence, some
252
350
# translated manual pages may point to other translated manual pages that do
253
351
# not exist. In these cases, redirect to the English version.
@@ -432,8 +530,15 @@ def index_doc(filter_tags, doc_list, get_content)
432
530
end
433
531
434
532
check_paths = Set . new ( [ ] )
533
+ glossary_data_by_lang = { }
435
534
436
- doc_files . each do |entry |
535
+ # Process glossary docs first so that we can use the parsed glossary to mark
536
+ # tooltip items in the other documents
537
+ glossary_docs = doc_files . select { |entry | File . basename ( entry [ 0 ] . sub ( /\. adoc$/ , '.txt' ) , '.txt' ) == 'gitglossary' }
538
+ other_docs = doc_files . reject { |entry | File . basename ( entry [ 0 ] . sub ( /\. adoc$/ , '.txt' ) , '.txt' ) == 'gitglossary' }
539
+ ordered_docs = glossary_docs + other_docs
540
+
541
+ ordered_docs . each do |entry |
437
542
path , sha = entry
438
543
txt_path = path . sub ( /\. adoc$/ , '.txt' )
439
544
ids = Set . new ( [ ] )
@@ -482,6 +587,12 @@ def index_doc(filter_tags, doc_list, get_content)
482
587
483
588
# Generate HTML
484
589
html = asciidoc . render
590
+
591
+ if docname == 'gitglossary'
592
+ glossary_data_by_lang [ 'en' ] = extract_glossary_from_html ( html , 'en' )
593
+ puts " extracted #{ glossary_data_by_lang [ 'en' ] . size } glossary terms for 'en'"
594
+ end
595
+
485
596
html . gsub! ( /linkgit:+(\S +?)\[ (\d +)\] / ) do |line |
486
597
x = /^linkgit:+(\S +?)\[ (\d +)\] / . match ( line )
487
598
if x [ 1 ] == "curl"
@@ -522,6 +633,8 @@ def index_doc(filter_tags, doc_list, get_content)
522
633
"#{ before } {{< relurl \" #{ after } \" >}}"
523
634
end
524
635
636
+ html = mark_glossary_tooltips ( html , glossary_data_by_lang , 'en' )
637
+
525
638
doc_versions = version_map . keys . sort { |a , b | Version . version_to_num ( a ) <=> Version . version_to_num ( b ) }
526
639
doc_version_index = doc_versions . index ( version )
527
640
@@ -640,6 +753,9 @@ def index_doc(filter_tags, doc_list, get_content)
640
753
end
641
754
end
642
755
end
756
+
757
+ save_glossary_files ( glossary_data_by_lang )
758
+
643
759
data [ "latest-version" ] = version if !data [ "latest-version" ] || Version . version_to_num ( data [ "latest-version" ] ) < Version . version_to_num ( version )
644
760
end
645
761
0 commit comments