-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathscraper.py
More file actions
96 lines (82 loc) · 3.22 KB
/
scraper.py
File metadata and controls
96 lines (82 loc) · 3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Cullen, Riley
# PinterestScraper.py
# Created on 5/5/2020
# Revision History
# May 5, 2020:
# 1). Main() and RunScraper defined and implemented
# May 8, 2020:
# 1). PrintCommandList() defined and implemented
# May 12, 2020:
# 1). GetPassword() defined and implemented
# 2). User no longer enters password when starting program. Passwords are
# entered after the program begins
# 3). GetEmail() defined and implemented
# May 13, 2020:
# 1). Function documentation updated
# May 16, 2020:
# 1). RunScraper updated so user only has to enter keyword instead of having
# to enter keyword and directory (this is usually the same)
# TODO
# 1. Updated scraper so the user can enter root directory from shell
import sys, PinterestScraper, requests, bs4, csv, os, getpass, CSVHelper
def Main():
if len(sys.argv) == 2:
password = GetPassword()
RunScraper(password)
else:
print('invalid arguments... python3 scraper.py [email] required')
# desc: Main loop for scraper shell
#
# Parameters:
# ------------
# password : string
# Holds the user entered password
def RunScraper(password):
isRunning = True
pinObj = PinterestScraper.PinterestScraper(sys.argv[1], password)
root = ''
while(pinObj.GetLoginStatus() == False):
email = GetEmail()
password = GetPassword()
pinObj.Login(email, password)
print('Root directory: ' + pinObj.GetRoot() + '\n')
while (isRunning):
usrInput = input('[Pinterest_Scraper] $ ')
tokens = usrInput.split(' ')
if (tokens[0] == 'quit'):
isRunning = False
elif (tokens[0] == 'scrape'):
keyword = input('Keyword: ')
linkSetURL = input('What pinterest page do you wanna scrape? ')
pinObj.GetLinkSet(linkSetURL, keyword)
pinObj.ScrapeLinkset()
elif (tokens[0] == 'help'):
PrintCommandList()
elif (tokens[0] == 'create'):
if (len(tokens) == 3):
if (tokens[1] == 'master' and tokens[2] == 'csv'):
print('root:%s'%pinObj.GetRoot())
CSVHelper.CreateMasterCSV(pinObj.GetRoot(), 'master.csv')
elif (tokens[0] == 'set'):
if (len(tokens) == 3):
if (tokens[1] == 'root' and tokens[2] == 'directory'):
root = input('root: ')
if (not pinObj.SetRoot(root)):
print('Invalid root. Root could not be set!\n')
elif (tokens[1] == 'image' and tokens[2] == 'bounds'):
hMin = input('horizontal min: ')
vMin = input('vertical min: ')
if (not pinObj.SetBounds(hMin, vMin)):
print('Invalid bounds. Bounds could not be set!\n')
# desc: Prints out the currently supported commands
def PrintCommandList():
print('\nCommands:\nscrape - runs Pinterest Scraper\nquit - Terminates program')
print('\n')
# desc: Receives and returns the user's password
def GetPassword():
return getpass.getpass('Password: ')
# desc: Receives and returns the user's email
def GetEmail():
return input('Username: ')
if __name__ == '__main__':
Main()