|
| 1 | +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ |
| 2 | + |
| 3 | +/* |
| 4 | + * This structure provides a vDSO-style clock to VM guests, exposing the |
| 5 | + * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch |
| 6 | + * counter, etc.) and real time. It is designed to address the problem of |
| 7 | + * live migration, which other clock enlightenments do not. |
| 8 | + * |
| 9 | + * When a guest is live migrated, this affects the clock in two ways. |
| 10 | + * |
| 11 | + * First, even between identical hosts the actual frequency of the underlying |
| 12 | + * counter will change within the tolerances of its specification (typically |
| 13 | + * ±50PPM, or 4 seconds a day). This frequency also varies over time on the |
| 14 | + * same host, but can be tracked by NTP as it generally varies slowly. With |
| 15 | + * live migration there is a step change in the frequency, with no warning. |
| 16 | + * |
| 17 | + * Second, there may be a step change in the value of the counter itself, as |
| 18 | + * its accuracy is limited by the precision of the NTP synchronization on the |
| 19 | + * source and destination hosts. |
| 20 | + * |
| 21 | + * So any calibration (NTP, PTP, etc.) which the guest has done on the source |
| 22 | + * host before migration is invalid, and needs to be redone on the new host. |
| 23 | + * |
| 24 | + * In its most basic mode, this structure provides only an indication to the |
| 25 | + * guest that live migration has occurred. This allows the guest to know that |
| 26 | + * its clock is invalid and take remedial action. For applications that need |
| 27 | + * reliable accurate timestamps (e.g. distributed databases), the structure |
| 28 | + * can be mapped all the way to userspace. This allows the application to see |
| 29 | + * directly for itself that the clock is disrupted and take appropriate |
| 30 | + * action, even when using a vDSO-style method to get the time instead of a |
| 31 | + * system call. |
| 32 | + * |
| 33 | + * In its more advanced mode. this structure can also be used to expose the |
| 34 | + * precise relationship of the CPU counter to real time, as calibrated by the |
| 35 | + * host. This means that userspace applications can have accurate time |
| 36 | + * immediately after live migration, rather than having to pause operations |
| 37 | + * and wait for NTP to recover. This mode does, of course, rely on the |
| 38 | + * counter being reliable and consistent across CPUs. |
| 39 | + * |
| 40 | + * Note that this must be true UTC, never with smeared leap seconds. If a |
| 41 | + * guest wishes to construct a smeared clock, it can do so. Presenting a |
| 42 | + * smeared clock through this interface would be problematic because it |
| 43 | + * actually messes with the apparent counter *period*. A linear smearing |
| 44 | + * of 1 ms per second would effectively tweak the counter period by 1000PPM |
| 45 | + * at the start/end of the smearing period, while a sinusoidal smear would |
| 46 | + * basically be impossible to represent. |
| 47 | + * |
| 48 | + * This structure is offered with the intent that it be adopted into the |
| 49 | + * nascent virtio-rtc standard, as a virtio-rtc that does not address the live |
| 50 | + * migration problem seems a little less than fit for purpose. For that |
| 51 | + * reason, certain fields use precisely the same numeric definitions as in |
| 52 | + * the virtio-rtc proposal. The structure can also be exposed through an ACPI |
| 53 | + * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for |
| 54 | + * the fact that it uses a real _CRS to convey the address of the structure |
| 55 | + * (which should be a full page, to allow for mapping directly to userspace). |
| 56 | + */ |
| 57 | + |
| 58 | +#ifndef __VMCLOCK_ABI_H__ |
| 59 | +#define __VMCLOCK_ABI_H__ |
| 60 | + |
| 61 | +#include <linux/types.h> |
| 62 | + |
| 63 | +struct vmclock_abi { |
| 64 | + /* CONSTANT FIELDS */ |
| 65 | + __le32 magic; |
| 66 | +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ |
| 67 | + __le32 size; /* Size of region containing this structure */ |
| 68 | + __le16 version; /* 1 */ |
| 69 | + __u8 counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ |
| 70 | +#define VMCLOCK_COUNTER_ARM_VCNT 0 |
| 71 | +#define VMCLOCK_COUNTER_X86_TSC 1 |
| 72 | +#define VMCLOCK_COUNTER_INVALID 0xff |
| 73 | + __u8 time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ |
| 74 | +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ |
| 75 | +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ |
| 76 | +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ |
| 77 | +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ |
| 78 | +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ |
| 79 | + |
| 80 | + /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ |
| 81 | + __le32 seq_count; /* Low bit means an update is in progress */ |
| 82 | + /* |
| 83 | + * This field changes to another non-repeating value when the CPU |
| 84 | + * counter is disrupted, for example on live migration. This lets |
| 85 | + * the guest know that it should discard any calibration it has |
| 86 | + * performed of the counter against external sources (NTP/PTP/etc.). |
| 87 | + */ |
| 88 | + __le64 disruption_marker; |
| 89 | + __le64 flags; |
| 90 | + /* Indicates that the tai_offset_sec field is valid */ |
| 91 | +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) |
| 92 | + /* |
| 93 | + * Optionally used to notify guests of pending maintenance events. |
| 94 | + * A guest which provides latency-sensitive services may wish to |
| 95 | + * remove itself from service if an event is coming up. Two flags |
| 96 | + * indicate the approximate imminence of the event. |
| 97 | + */ |
| 98 | +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ |
| 99 | +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ |
| 100 | +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) |
| 101 | +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) |
| 102 | +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) |
| 103 | +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) |
| 104 | + /* |
| 105 | + * If the MONOTONIC flag is set then (other than leap seconds) it is |
| 106 | + * guaranteed that the time calculated according this structure at |
| 107 | + * any given moment shall never appear to be later than the time |
| 108 | + * calculated via the structure at any *later* moment. |
| 109 | + * |
| 110 | + * In particular, a timestamp based on a counter reading taken |
| 111 | + * immediately after setting the low bit of seq_count (and the |
| 112 | + * associated memory barrier), using the previously-valid time and |
| 113 | + * period fields, shall never be later than a timestamp based on |
| 114 | + * a counter reading taken immediately before *clearing* the low |
| 115 | + * bit again after the update, using the about-to-be-valid fields. |
| 116 | + */ |
| 117 | +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) |
| 118 | + |
| 119 | + __u8 pad[2]; |
| 120 | + __u8 clock_status; |
| 121 | +#define VMCLOCK_STATUS_UNKNOWN 0 |
| 122 | +#define VMCLOCK_STATUS_INITIALIZING 1 |
| 123 | +#define VMCLOCK_STATUS_SYNCHRONIZED 2 |
| 124 | +#define VMCLOCK_STATUS_FREERUNNING 3 |
| 125 | +#define VMCLOCK_STATUS_UNRELIABLE 4 |
| 126 | + |
| 127 | + /* |
| 128 | + * The time exposed through this device is never smeared. This field |
| 129 | + * corresponds to the 'subtype' field in virtio-rtc, which indicates |
| 130 | + * the smearing method. However in this case it provides a *hint* to |
| 131 | + * the guest operating system, such that *if* the guest OS wants to |
| 132 | + * provide its users with an alternative clock which does not follow |
| 133 | + * UTC, it may do so in a fashion consistent with the other systems |
| 134 | + * in the nearby environment. |
| 135 | + */ |
| 136 | + __u8 leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ |
| 137 | +#define VMCLOCK_SMEARING_STRICT 0 |
| 138 | +#define VMCLOCK_SMEARING_NOON_LINEAR 1 |
| 139 | +#define VMCLOCK_SMEARING_UTC_SLS 2 |
| 140 | + __le16 tai_offset_sec; /* Actually two's complement signed */ |
| 141 | + __u8 leap_indicator; |
| 142 | + /* |
| 143 | + * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined |
| 144 | + * in the current draft of virtio-rtc, but since smearing cannot be |
| 145 | + * used with the shared memory device, some values are not used. |
| 146 | + * |
| 147 | + * The _POST_POS and _POST_NEG values allow the guest to perform |
| 148 | + * its own smearing during the day or so after a leap second when |
| 149 | + * such smearing may need to continue being applied for a leap |
| 150 | + * second which is now theoretically "historical". |
| 151 | + */ |
| 152 | +#define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ |
| 153 | +#define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ |
| 154 | +#define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ |
| 155 | +#define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ |
| 156 | +#define VMCLOCK_LEAP_POST_POS 0x04 |
| 157 | +#define VMCLOCK_LEAP_POST_NEG 0x05 |
| 158 | + |
| 159 | + /* Bit shift for counter_period_frac_sec and its error rate */ |
| 160 | + __u8 counter_period_shift; |
| 161 | + /* |
| 162 | + * Paired values of counter and UTC at a given point in time. |
| 163 | + */ |
| 164 | + __le64 counter_value; |
| 165 | + /* |
| 166 | + * Counter period, and error margin of same. The unit of these |
| 167 | + * fields is 1/2^(64 + counter_period_shift) of a second. |
| 168 | + */ |
| 169 | + __le64 counter_period_frac_sec; |
| 170 | + __le64 counter_period_esterror_rate_frac_sec; |
| 171 | + __le64 counter_period_maxerror_rate_frac_sec; |
| 172 | + |
| 173 | + /* |
| 174 | + * Time according to time_type field above. |
| 175 | + */ |
| 176 | + __le64 time_sec; /* Seconds since time_type epoch */ |
| 177 | + __le64 time_frac_sec; /* Units of 1/2^64 of a second */ |
| 178 | + __le64 time_esterror_nanosec; |
| 179 | + __le64 time_maxerror_nanosec; |
| 180 | +}; |
| 181 | + |
| 182 | +#endif /* __VMCLOCK_ABI_H__ */ |
0 commit comments