-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcrawler.py
More file actions
77 lines (58 loc) · 2.11 KB
/
crawler.py
File metadata and controls
77 lines (58 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import httpx
from bs4 import BeautifulSoup
import time
# 전체공지
def crawl_notices():
url = 'https://www.gachon.ac.kr/kor/7986/subview.do'
# httpx를 사용하여 웹 페이지로부터 HTML 가져오기
with httpx.Client() as client:
response = client.get(url)
response.raise_for_status() # 네트워크 오류 발생 시 예외 처리
# BeautifulSoup 객체 생성
soup = BeautifulSoup(response.text, 'html.parser')
# 원하는 데이터 추출
link_title = soup.find_all('td','td-subject')
notices = []
for num in range(5):
notices.append(link_title[num].get_text().strip())
return notices
# 학사일정
def crawl_academic_schedule():
url = 'https://www.gachon.ac.kr/kor/1075/subview.do'
with httpx.Client() as client:
response = client.get(url)
response.raise_for_status() # 네트워크 오류 발생 시 예외 처리
soup = BeautifulSoup(response.text, 'html.parser')
target = soup.find('div', 'sche-comt')
tbody = target.find('tbody')
tr = tbody.find_all('tr')
schedule = []
for num in range(len(tr)):
schedule.append(tr[num].get_text().strip())
return schedule
# 중앙도서관 좌석
def crawl_library_seats(type='READING', libno='2'):
url = 'https://lib.gachon.ac.kr/main/seatAjax'
data = {
'type': type,
'libno': libno,
'time': int(time.time()*1000)
}
with httpx.Client() as client:
response = client.get(url, params=data)
response.raise_for_status()
body = response.json()
return body
# 학식
def crawl_cafeteria_menu():
url = 'https://www.gachon.ac.kr/kor/7350/subview.do'
# httpx를 사용하여 웹 페이지로부터 HTML 가져오기
with httpx.Client() as client:
response = client.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
link_title = soup.find("div", {"class": "table_1"}).find_all(text=True)
menu = []
for num in range(len(link_title)):
menu.append(link_title[num].get_text().strip())
return menu