11#import threading
22import requests
3- import datetime
43import os
54import magic
65import threading
76import time
87import http .client
98from urllib .parse import urljoin
9+ from datetime import datetime , timezone
10+
11+
12+
13+
14+ # GET: store page to wayback machine and response with redirect to snapshot
15+ # POST: store page to wayback machine and response with wayback machine status-page
16+ # tag_jobid = '<script>spn.watchJob("spn2-%s", "/_static/",6000);</script>'
17+ # tag_result_timeout = '<p>The same snapshot had been made %s minutes ago. You can make new capture of this URL after 1 hour.</p>'
18+ # tag_result_success = ' A snapshot was captured. Visit page: <a href="%s">%s</a>'
19+ def save_page (url : str ):
20+ """
21+ Saves a webpage to the Wayback Machine.
22+
23+ Args:
24+ url (str): The URL of the webpage to be saved.
25+
26+ Returns:
27+ None: The function does not return any value. It only prints messages to the console.
28+ """
29+ print ("\n Saving page to the Wayback Machine..." )
30+ connection = http .client .HTTPSConnection ("web.archive.org" )
31+ headers = {
32+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
33+ }
34+ connection .request ("GET" , f"https://web.archive.org/save/{ url } " , headers = headers )
35+ print ("\n -----> Request sent" )
36+ response = connection .getresponse ()
37+ response_status = response .status
38+
39+ if response_status == 302 :
40+ location = response .getheader ("Location" )
41+ print ("\n -----> Response: 302 (redirect to snapshot)" )
42+ snapshot_timestamp = datetime .strptime (location .split ('/web/' )[1 ].split ('/' )[0 ], '%Y%m%d%H%M%S' ).strftime ('%Y-%m-%d %H:%M:%S' )
43+ current_timestamp = datetime .now (timezone .utc ).strftime ('%Y-%m-%d %H:%M:%S' )
44+ timestamp_difference = (datetime .strptime (current_timestamp , '%Y-%m-%d %H:%M:%S' ) - datetime .strptime (snapshot_timestamp , '%Y-%m-%d %H:%M:%S' )).seconds / 60
45+ timestamp_difference = int (round (timestamp_difference , 0 ))
46+
47+ if timestamp_difference < 1 :
48+ print ("\n -----> New snapshot created" )
49+ elif timestamp_difference > 1 :
50+ print (f"\n -----> Snapshot already exists. (1 hour limit) - wait for { 60 - timestamp_difference } minutes" )
51+ print (f"TIMESTAMP SNAPSHOT: { snapshot_timestamp } " )
52+ print (f"TIMESTAMP REQUEST : { current_timestamp } " )
53+ print (f"\n LAST SNAPSHOT BACK: { timestamp_difference } minutes" )
54+
55+ print (f"\n URL: { location } " )
56+
57+ elif response_status == 404 :
58+ print ("\n -----> Response: 404 (not found)" )
59+ print (f"\n FAILED -> URL: { url } " )
60+ else :
61+ print ("\n -----> Response: unexpected" )
62+ print (f"\n FAILED -> URL: { url } " )
63+
64+ connection .close ()
65+
66+
67+
68+
1069
1170def print_result (result_list ):
1271 print ("" )
@@ -16,11 +75,15 @@ def print_result(result_list):
1675 __import__ ('pprint' ).pprint (result_list )
1776 print (f"\n -----> { len (result_list )} snapshots listed" )
1877
78+
79+
80+
81+
1982# create filelist
2083def query_list (url : str , range : int , mode : str ):
2184 print ("\n Querying snapshots..." )
2285 if range :
23- range = datetime .datetime . now ().year - range
86+ range = datetime .now ().year - range
2487 range = "&from=" + str (range )
2588 else :
2689 range = ""
0 commit comments