Skip to content

Commit 4962e43

Browse files
committed
Transform/DDG: Pass desc building parameters from command line
1 parent cab9de2 commit 4962e43

File tree

2 files changed

+28
-14
lines changed

2 files changed

+28
-14
lines changed

ddg_parse_html.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ def on_ie_text(pos, match_text):
262262
parenthesized text. If the size of parenthesized block exceeds that, it is
263263
removed. Such blocks within <code>, <b> or <i> tag are ignored.
264264
'''
265-
def process_description(el, max_sentences=1, max_chars=200,
266-
max_paren_text_size=40, debug=False):
265+
def process_description(el, max_sentences, max_chars,
266+
max_paren_text_size, debug=False):
267267

268268
el = deepcopy(el) # we'll modify the tree
269269
el.tag = 'root'
@@ -332,7 +332,8 @@ def process_description(el, max_sentences=1, max_chars=200,
332332
333333
Raises DdgException on error
334334
'''
335-
def get_short_description(root_el, num, debug=False):
335+
def get_short_description(root_el, num, max_sentences=1, max_chars=200,
336+
max_paren_text_size=40, debug=False):
336337

337338
content_el = get_content_el(root_el)
338339

@@ -349,7 +350,8 @@ def get_short_description(root_el, num, debug=False):
349350
raise DdgException("No elements after dcl table")
350351

351352
if desc_el.tag == 'p':
352-
return process_description(desc_el, debug=debug)
353+
return process_description(desc_el, max_sentences, max_chars,
354+
max_paren_text_size, debug=debug)
353355
elif desc_el.tag == 'div' and desc_el.get('class') == 't-li1':
354356
if num == None:
355357
raise DdgException("Versioned summary with no version supplied")
@@ -367,17 +369,20 @@ def get_short_description(root_el, num, debug=False):
367369
m = re.match('^\s*(\d+)\)\s*$', index)
368370
if m and int(m.group(1)) == num:
369371
index_el.drop_tree()
370-
return process_description(desc_el, debug=debug)
372+
return process_description(desc_el, max_sentences, max_chars,
373+
max_paren_text_size, debug=debug)
371374

372375
m = re.match('^\s*(\d+)-(\d+)\)\s*$', index)
373376
if m and int(m.group(1)) <= num and int(m.group(2)) >= num:
374377
index_el.drop_tree()
375-
return process_description(desc_el, debug=debug)
378+
return process_description(desc_el, max_sentences, max_chars,
379+
max_paren_text_size, debug=debug)
376380

377381
m = re.match('^\s*(\d+),(\d+)\)\s*$', index)
378382
if m and num in [int(m.group(1)), int(m.group(2))]:
379383
index_el.drop_tree()
380-
return process_description(desc_el, debug=debug)
384+
return process_description(desc_el, max_sentences, max_chars,
385+
max_paren_text_size, debug=debug)
381386

382387
desc_el = desc_el.getnext()
383388
raise DdgException("List items are not numbered")

index2ddg.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -376,23 +376,25 @@ def output_redirects(out, redirects):
376376
out.write(line)
377377

378378
def process_identifier(out, redirects, root, link, item_ident, item_type,
379-
max_code_lines, debug=DDGDebug()):
379+
opts, debug=DDGDebug()):
380380
# get the name by extracting the unqualified identifier
381381
name = get_name(item_ident)
382382
debug_verbose = True if debug.enabled and debug.ident_match is not None else False
383383

384384
try:
385385
if item_type == ITEM_TYPE_CLASS:
386386
decls = get_declarations(root, name)
387-
desc = get_short_description(root, get_version(decls), debug=debug_verbose)
388-
abstract = build_abstract(decls, desc, max_code_lines, debug=debug)
387+
desc = get_short_description(root, get_version(decls), opts.max_sentences, opts.max_characters,
388+
opts.max_paren_chars, debug=debug_verbose)
389+
abstract = build_abstract(decls, desc, opts.max_code_lines, debug=debug)
389390

390391
elif item_type in [ ITEM_TYPE_FUNCTION,
391392
ITEM_TYPE_CONSTRUCTOR,
392393
ITEM_TYPE_DESTRUCTOR ]:
393394
decls = get_declarations(root, name)
394-
desc = get_short_description(root, get_version(decls), debug=debug_verbose)
395-
abstract = build_abstract(decls, desc, max_code_lines, debug=debug)
395+
desc = get_short_description(root, get_version(decls), opts.max_sentences, opts.max_characters,
396+
opts.max_paren_chars, debug=debug_verbose)
397+
abstract = build_abstract(decls, desc, opts.max_code_lines, debug=debug)
396398

397399
elif item_type in [ ITEM_TYPE_FUNCTION_INLINEMEM,
398400
ITEM_TYPE_CONSTRUCTOR_INLINEMEM,
@@ -458,6 +460,14 @@ def main():
458460
help='The path to destination output.txt file')
459461
parser.add_argument('--max_code_lines', type=int, default=6,
460462
help='Maximum number of lines of code to show in abstract')
463+
parser.add_argument('--max_sentences', type=int, default=1,
464+
help='Maximum number of sentences to use for the description')
465+
parser.add_argument('--max_characters', type=int, default=200,
466+
help='Maximum number of characters to use for the description')
467+
parser.add_argument('--max_paren_chars', type=int, default=40,
468+
help='Maximum size of parenthesized text in the description. '+
469+
'Parenthesized chunks longer than that is removed, unless '+
470+
'they are within <code>, <b> or <i> tags')
461471
parser.add_argument('--debug', action='store_true', default=False,
462472
help='Enables debug mode.')
463473
parser.add_argument('--debug_ident', type=str, default=None,
@@ -474,7 +484,6 @@ def main():
474484

475485
index_file = args.index
476486
output_file = args.output
477-
max_code_lines = args.max_code_lines
478487

479488
# a map that stores information about location and type of identifiers
480489
# it's two level map: full_link maps to a dict that has full_name map to
@@ -529,7 +538,7 @@ def main():
529538
item_type = ident['type']
530539

531540
process_identifier(out, redirects, root, link, item_ident, item_type,
532-
max_code_lines, debug=debug)
541+
args, debug=debug)
533542

534543
output_redirects(out, redirects)
535544

0 commit comments

Comments
 (0)