-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsync.py
More file actions
103 lines (85 loc) · 2.5 KB
/
sync.py
File metadata and controls
103 lines (85 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
This script is meant to perform one-directional sync
Duplicates of the same name and path will be overwritten.
!TODO
- Check on Windows
- removal of (some?) files, like *.tmp or .*
- mask from sync for these kind of files
'''
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import hashlib, shutil, time, os
import threading
waiting_time = 30 #I/O, download
repeat_time = 60
resync_interval = 3600
#src_dir = r"C:\path\to\dir1"
#dst_dir = r"C:\path\to\dir2"
src_dir = '/home/vasily/test1/'
dst_dir = '/home/vasily/test2/'
def get_file_hash(path, block_size=65536):
sha = hashlib.sha256()
try:
with open(path, 'rb') as f:
for block in iter(lambda: f.read(block_size), b''):
sha.update(block)
return sha.hexdigest()
except Exception as e:
print(f"Error hashing {path}: {e}")
return None
def sync_file(src_path):
rel_path = os.path.relpath(src_path, src_dir)
dest_path = os.path.join(dst_dir, rel_path)
src_hash = get_file_hash(src_path)
if src_hash is None:
return
if os.path.exists(dest_path):
dest_hash = get_file_hash(dest_path)
if dest_hash == src_hash:
print(f"Skipped (already exists with same content): {rel_path}")
return
else:
print(f"Already exists, content differs, overwriting: {rel_path}")
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
shutil.copy2(src_path, dest_path)
print(f"Copied: {src_path} -> {dest_path}")
def full_resync():
print("🔁 Running full resync...")
for root, _, files in os.walk(src_dir):
for fname in files:
full_path = os.path.join(root, fname)
sync_file(full_path)
print("✅ Full resync completed.")
def periodic_resync():
while not stop_event.is_set():
if stop_event.wait(timeout=waiting_time):
break
time.sleep(resync_interval)
full_resync()
class FileHandler(FileSystemEventHandler):
def on_created(self, event):
if not event.is_directory: #empty subdir will not be copied?
time.sleep(waiting_time)
sync_file(event.src_path)
stop_event = threading.Event()
observer = Observer()
observer.schedule(FileHandler(), path=src_dir, recursive=True)
observer.start()
print("Watching for new files...")
# Initial full sync
full_resync()
resync_thread = threading.Thread(target=periodic_resync)
resync_thread.start()
try:
while True:
time.sleep(repeat_time)
except KeyboardInterrupt:
print("🛑 Stopping...")
observer.stop()
stop_event.set()
resync_thread.join()
observer.join()
print("✅ Stopped cleanly.")
observer.join()