From a8e1e50df38bb4c3e4596b6b33e9b8d91c1946ee Mon Sep 17 00:00:00 2001 From: L Prathyusha <83442712+Prathyusha-L@users.noreply.github.com> Date: Wed, 18 Aug 2021 15:17:45 +0530 Subject: [PATCH 1/2] Create PR --- PR | 1 + 1 file changed, 1 insertion(+) create mode 100644 PR diff --git a/PR b/PR new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/PR @@ -0,0 +1 @@ + From 3bbfb53dddc1bf0464fea4907a29b9e04e22736b Mon Sep 17 00:00:00 2001 From: L Prathyusha <83442712+Prathyusha-L@users.noreply.github.com> Date: Wed, 18 Aug 2021 15:41:30 +0530 Subject: [PATCH 2/2] Update and rename PR to Hidden websites Before we move to the things that can make scraping tricky, let's break down the process of web scraping into broad steps: Visual inspection: Figure out what to extract Make an HTTP request to the webpage Parse the HTTP response Persist/Utilize the relevant data --- Hidden websites | 29 +++++++++++++++++++++++++++++ PR | 1 - 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 Hidden websites delete mode 100644 PR diff --git a/Hidden websites b/Hidden websites new file mode 100644 index 0000000..a78f476 --- /dev/null +++ b/Hidden websites @@ -0,0 +1,29 @@ +import requests +from bs4 import BeautifulSoup +session=requests.Session() +url="https://github.com/login" +username='Macuyiko' +#visit the login page +r=session.get(url.format('login')) +html_soup=BeautifulSoup(r.text,'html.parser') + + + +form=html_soup.find_all('form') +form + + +# will give values for all the hidden values in the login page +hidden_tags = html_soup.find_all("input",type="hidden") +hidden_tags + + +data={} +for form in html_soup.find_all('form'): + #get out the hidden form fields + for inp in form.select('input[type=hidden]'): + data[inp.get('name')]=inp.get('value') + print(dat) + + + diff --git a/PR b/PR deleted file mode 100644 index 8b13789..0000000 --- a/PR +++ /dev/null @@ -1 +0,0 @@ -