Flask/crawler.py at main · Garam-e/Flask · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import httpx
from bs4 import BeautifulSoup
import time

# 전체공지
def crawl_notices():
    url = 'https://www.gachon.ac.kr/kor/7986/subview.do'
    # httpx를 사용하여 웹 페이지로부터 HTML 가져오기
    with httpx.Client() as client:
        response = client.get(url)
        response.raise_for_status()  # 네트워크 오류 발생 시 예외 처리

    # BeautifulSoup 객체 생성
    soup = BeautifulSoup(response.text, 'html.parser')

    # 원하는 데이터 추출
    link_title = soup.find_all('td','td-subject')

    notices = []
    for num in range(5):
        notices.append(link_title[num].get_text().strip())

    return notices

# 학사일정
def crawl_academic_schedule():
    url = 'https://www.gachon.ac.kr/kor/1075/subview.do'
    with httpx.Client() as client:
        response = client.get(url)
        response.raise_for_status()  # 네트워크 오류 발생 시 예외 처리

    soup = BeautifulSoup(response.text, 'html.parser')

    target = soup.find('div', 'sche-comt')
    tbody = target.find('tbody')
    tr = tbody.find_all('tr')

    schedule = []
    for num in range(len(tr)):
        schedule.append(tr[num].get_text().strip())

    return schedule

# 중앙도서관 좌석
def crawl_library_seats(type='READING', libno='2'):
    url = 'https://lib.gachon.ac.kr/main/seatAjax'
    data = {
        'type': type,
        'libno': libno,
        'time': int(time.time()*1000)
    }

    with httpx.Client() as client:
        response = client.get(url, params=data)
        response.raise_for_status()

    body = response.json()

    return body

# 학식
def crawl_cafeteria_menu():
    url = 'https://www.gachon.ac.kr/kor/7350/subview.do'
    # httpx를 사용하여 웹 페이지로부터 HTML 가져오기
    with httpx.Client() as client:
        response = client.get(url)
        response.raise_for_status()

    soup = BeautifulSoup(response.text, 'html.parser')

    link_title = soup.find("div", {"class": "table_1"}).find_all(text=True)

    menu = []
    for num in range(len(link_title)):
        menu.append(link_title[num].get_text().strip())

    return menu