|
31 | 31 | from numbers import Integral, Real |
32 | 32 | from frozendict import frozendict |
33 | 33 | from pyld.__about__ import (__copyright__, __license__, __version__) |
34 | | -from .iri_resolver import resolve |
| 34 | +from .iri_resolver import parse_url, resolve, unresolve |
35 | 35 |
|
36 | 36 | def cmp(a, b): |
37 | 37 | return (a > b) - (a < b) |
@@ -444,134 +444,6 @@ def unregister_rdf_parser(content_type): |
444 | 444 | del _rdf_parsers[content_type] |
445 | 445 |
|
446 | 446 |
|
447 | | -def remove_base(base, iri): |
448 | | - """ |
449 | | - Removes a base IRI from the given absolute IRI. |
450 | | -
|
451 | | - :param base: the base IRI. |
452 | | - :param iri: the absolute IRI. |
453 | | -
|
454 | | - :return: the relative IRI if relative to base, otherwise the absolute IRI. |
455 | | - """ |
456 | | - # TODO: better sync with jsonld.js version |
457 | | - # skip IRI processing |
458 | | - if base is None: |
459 | | - return iri |
460 | | - |
461 | | - base = parse_url(base) |
462 | | - rel = parse_url(iri) |
463 | | - |
464 | | - # schemes and network locations (authorities) don't match, don't alter IRI |
465 | | - if not (base.scheme == rel.scheme and base.authority == rel.authority): |
466 | | - return iri |
467 | | - |
468 | | - # remove path segments that match (do not remove last segment unless there |
469 | | - # is a hash or query |
470 | | - base_segments = remove_dot_segments(base.path).split('/') |
471 | | - iri_segments = remove_dot_segments(rel.path).split('/') |
472 | | - last = 0 if (rel.fragment or rel.query) else 1 |
473 | | - while (len(base_segments) and len(iri_segments) > last and |
474 | | - base_segments[0] == iri_segments[0]): |
475 | | - base_segments.pop(0) |
476 | | - iri_segments.pop(0) |
477 | | - |
478 | | - # use '../' for each non-matching base segment |
479 | | - rval = '' |
480 | | - if len(base_segments): |
481 | | - # don't count the last segment (if it ends with '/' last path doesn't |
482 | | - # count and if it doesn't end with '/' it isn't a path) |
483 | | - base_segments.pop() |
484 | | - rval += '../' * len(base_segments) |
485 | | - |
486 | | - # prepend remaining segments |
487 | | - rval += '/'.join(iri_segments) |
488 | | - |
489 | | - return unparse_url((None, None, rval, rel.query, rel.fragment)) or './' |
490 | | - |
491 | | - |
492 | | -def remove_dot_segments(path): |
493 | | - """ |
494 | | - Removes dot segments from a URL path. |
495 | | -
|
496 | | - :param path: the path to remove dot segments from. |
497 | | -
|
498 | | - :return: a path with normalized dot segments. |
499 | | - """ |
500 | | - |
501 | | - # RFC 3986 5.2.4 (reworked) |
502 | | - |
503 | | - # empty path shortcut |
504 | | - if len(path) == 0: |
505 | | - return '' |
506 | | - |
507 | | - input = path.split('/') |
508 | | - output = [] |
509 | | - |
510 | | - while len(input) > 0: |
511 | | - next = input.pop(0) |
512 | | - done = len(input) == 0 |
513 | | - |
514 | | - if next == '.': |
515 | | - if done: |
516 | | - # ensure output has trailing / |
517 | | - output.append('') |
518 | | - continue |
519 | | - |
520 | | - if next == '..': |
521 | | - if len(output) > 0: |
522 | | - output.pop() |
523 | | - if done: |
524 | | - # ensure output has trailing / |
525 | | - output.append('') |
526 | | - continue |
527 | | - |
528 | | - output.append(next) |
529 | | - |
530 | | - # ensure output has leading / |
531 | | - # merge path segments from section 5.2.3 |
532 | | - # note that if the path includes no segments, the entire path is removed |
533 | | - if len(output) > 0 and path.startswith('/') and output[0] != '': |
534 | | - output.insert(0, '') |
535 | | - if len(output) == 1 and output[0] == '': |
536 | | - return '/' |
537 | | - |
538 | | - return '/'.join(output) |
539 | | - |
540 | | - |
541 | | -ParsedUrl = namedtuple( |
542 | | - 'ParsedUrl', ['scheme', 'authority', 'path', 'query', 'fragment']) |
543 | | - |
544 | | - |
545 | | -def parse_url(url): |
546 | | - # regex from RFC 3986 |
547 | | - p = r'^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?' |
548 | | - m = re.match(p, url) |
549 | | - # remove default http and https ports |
550 | | - g = list(m.groups()) |
551 | | - if ((g[0] == 'https' and g[1].endswith(':443')) or |
552 | | - (g[0] == 'http' and g[1].endswith(':80'))): |
553 | | - g[1] = g[1][:g[1].rfind(':')] |
554 | | - return ParsedUrl(*g) |
555 | | - |
556 | | - |
557 | | -def unparse_url(parsed): |
558 | | - if isinstance(parsed, dict): |
559 | | - parsed = ParsedUrl(**parsed) |
560 | | - elif isinstance(parsed, list) or isinstance(parsed, tuple): |
561 | | - parsed = ParsedUrl(*parsed) |
562 | | - rval = '' |
563 | | - if parsed.scheme: |
564 | | - rval += parsed.scheme + ':' |
565 | | - if parsed.authority is not None: |
566 | | - rval += '//' + parsed.authority |
567 | | - rval += parsed.path |
568 | | - if parsed.query is not None: |
569 | | - rval += '?' + parsed.query |
570 | | - if parsed.fragment is not None: |
571 | | - rval += '#' + parsed.fragment |
572 | | - return rval |
573 | | - |
574 | | - |
575 | 447 | class JsonLdProcessor(object): |
576 | 448 | """ |
577 | 449 | A JSON-LD processor. |
@@ -4814,9 +4686,9 @@ def _compact_iri( |
4814 | 4686 | if active_ctx['@base'] is None: |
4815 | 4687 | return iri |
4816 | 4688 | else: |
4817 | | - return remove_base(resolve(active_ctx['@base'], base), iri) |
| 4689 | + return unresolve(iri, resolve(active_ctx['@base'], base)) |
4818 | 4690 | else: |
4819 | | - return remove_base(base, iri) |
| 4691 | + return unresolve(iri, base) |
4820 | 4692 |
|
4821 | 4693 | # return IRI as is |
4822 | 4694 | return iri |
|
0 commit comments