33- The 'unresolve()' function is a move and rename of the 'remove_base()' function from 'jsonld.py'
44"""
55
6- from collections import namedtuple
7- import re
6+ from urllib .parse import urlparse , urlunparse
87
98
109def is_character_allowed_after_relative_path_segment (ch : str ) -> bool :
@@ -220,20 +219,24 @@ def unresolve(absolute_iri: str, base_iri: str = ""):
220219
221220 :return: the relative IRI if relative to base, otherwise the absolute IRI.
222221 """
223- # TODO: better sync with jsonld.js version
224222 # skip IRI processing
225223 if not base_iri :
226224 return absolute_iri
227225
228- base = parse_url (base_iri )
226+ base = urlparse (base_iri )
229227
230228 if not base .scheme :
231229 raise ValueError (f"Found invalid baseIRI '{ base_iri } ' for value '{ absolute_iri } '" )
232-
233- rel = parse_url (absolute_iri )
230+
231+ # compute authority (netloc) and strip default ports
232+ base_authority = parse_authority (base )
233+
234+ rel = urlparse (absolute_iri )
235+ # compute authority (netloc) and strip default ports
236+ rel_authority = parse_authority (rel )
234237
235238 # schemes and network locations (authorities) don't match, don't alter IRI
236- if not (base .scheme == rel .scheme and base . authority == rel . authority ):
239+ if not (base .scheme == rel .scheme and base_authority == rel_authority ):
237240 return absolute_iri
238241
239242 # remove path segments that match (do not remove last segment unless there
@@ -257,36 +260,25 @@ def unresolve(absolute_iri: str, base_iri: str = ""):
257260 # prepend remaining segments
258261 rval += '/' .join (iri_segments )
259262
260- return unparse_url ((None , None , rval , rel .query , rel .fragment )) or './'
261-
262- ParsedUrl = namedtuple (
263- 'ParsedUrl' , ['scheme' , 'authority' , 'path' , 'query' , 'fragment' ])
263+ # build relative IRI using urlunparse with empty scheme/netloc
264+ return urlunparse (('' , '' , rval , '' , rel .query or '' , rel .fragment or '' )) or './'
264265
265- def parse_url (url ):
266- # regex from RFC 3986
267- p = r'^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?'
268- m = re .match (p , url )
269- # remove default http and https ports
270- g = list (m .groups ())
266+ def parse_authority (parsed_iri ) -> str :
267+ """
268+ Compute authority (netloc) and strip default ports
271269
272- if g [1 ] is not None and ((g [0 ] == 'https' and g [1 ].endswith (':443' )) or
273- (g [0 ] == 'http' and g [1 ].endswith (':80' ))):
274- g [1 ] = g [1 ][:g [1 ].rfind (':' )]
275- return ParsedUrl (* g )
276-
277- def unparse_url (parsed ):
278- if isinstance (parsed , dict ):
279- parsed = ParsedUrl (** parsed )
280- elif isinstance (parsed , list ) or isinstance (parsed , tuple ):
281- parsed = ParsedUrl (* parsed )
282- rval = ''
283- if parsed .scheme :
284- rval += parsed .scheme + ':'
285- if parsed .authority is not None :
286- rval += '//' + parsed .authority
287- rval += parsed .path
288- if parsed .query is not None :
289- rval += '?' + parsed .query
290- if parsed .fragment is not None :
291- rval += '#' + parsed .fragment
292- return rval
270+ :param parsed_iri: Description
271+ :return: Description
272+ :rtype: str
273+ """
274+ base_authority = parsed_iri .netloc or None
275+
276+ try :
277+ base_port = parsed_iri .port
278+ except Exception :
279+ base_port = None
280+
281+ if base_authority is not None and base_port is not None :
282+ if (parsed_iri .scheme == 'https' and base_port == 443 ) or (parsed_iri .scheme == 'http' and base_port == 80 ):
283+ base_authority = base_authority .rsplit (':' , 1 )[0 ]
284+ return base_authority
0 commit comments