diff --git a/source_code/1-1-urllib.py b/source_code/1-1-urllib.py index 3820cb7..6d2fcba 100644 --- a/source_code/1-1-urllib.py +++ b/source_code/1-1-urllib.py @@ -1,6 +1,6 @@ from urllib.request import urlopen -# if has Chinese, apply decode() +# if has indian, apply decode() html = urlopen("https://mofanpy.com/static/scraping/basic-structure.html").read().decode('utf-8') print(html) @@ -20,4 +20,4 @@ res = re.findall(r'href="(.*?)"', html) print("\nAll links: ", res) -# All links: ['https://mofanpy.com/static/img/description/tab_icon.png', 'https://mofanpy.com/', 'https://mofanpy.com/tutorials/scraping'] \ No newline at end of file +# All links: ['https://mofanpy.com/static/img/description/tab_icon.png', 'https://mofanpy.com/', 'https://mofanpy.com/tutorials/scraping']