PinterestScraper/TitleParser.py at master · RileyCullen/PinterestScraper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Cullen, Riley
# TitleParser.py
# Created on May 15, 2020

# Revision History:
#   May 15, 2020:
#       1). GetTitle defined and implemented

# TODO: If empty title, pass N/A

import bs4, requests

# desc: This function goes to a user specified url and gets that website's title
#
# Parameters:
# ------------
# url - string
#       Holds the URL that we want to get the title from
def GetTitle(url):
    title = "N/A"
    userAgent = {'User-agent': 'Mozilla/5.0'}
    requestsObject = requests.get(url, headers = userAgent)
    try:
        requestsObject.raise_for_status()
        soupObj = bs4.BeautifulSoup(requestsObject.text, "html.parser")

        title = soupObj.find("title").string
        if (len(title) == 0):
            title = 'N/A'
    except requests.exceptions.HTTPError:
        pass
    except requests.exceptions.RequestException as exc:
        print(exc)

    return title