Skip to content

Commit 80983a9

Browse files
committed
Improved tar file system implementation
* Now supports LongLink headers for long symbolic links * Now resolves hard links as VFS links to target file/directory * Separated out file type enum for tar headers instead of adding to unrelated UnixFileType enum
1 parent 4f592fd commit 80983a9

File tree

8 files changed

+294
-50
lines changed

8 files changed

+294
-50
lines changed

Library/DiscUtils.Core/Archives/TarFile.cs

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public TarFile(Stream fileStream)
7070
}
7171

7272
var record = new TarFileRecord(hdr, _fileStream.Position);
73-
if (hdr.FileType == UnixFileType.TarEntryLongLink &&
73+
if (hdr.FileType == TarFileType.TarEntryLongLink &&
7474
hdr.FileName == "././@LongLink")
7575
{
7676
var buffer = ArrayPool<byte>.Shared.Rent(checked((int)hdr.FileLength));
@@ -199,6 +199,7 @@ public static IEnumerable<TarFileData> EnumerateFiles(Stream archive)
199199
var hdrBuf = StreamUtilities.GetUninitializedArray<byte>(512);
200200

201201
string long_path = null;
202+
string long_link_path = null;
202203

203204
for (;;)
204205
{
@@ -209,12 +210,20 @@ public static IEnumerable<TarFileData> EnumerateFiles(Stream archive)
209210

210211
var hdr = new TarHeader(hdrBuf);
211212

212-
if (long_path is not null)
213+
if (long_path is not null
214+
&& hdr.FileType is not TarFileType.TarEntryLongLink and not TarFileType.TarEntryLongLinkTarget)
213215
{
214216
hdr.FileName = long_path;
215217
long_path = null;
216218
}
217219

220+
if (long_link_path is not null
221+
&& hdr.FileType is not TarFileType.TarEntryLongLink and not TarFileType.TarEntryLongLinkTarget)
222+
{
223+
hdr.LinkName = long_link_path;
224+
long_link_path = null;
225+
}
226+
218227
if (hdr.FileLength == 0 && string.IsNullOrEmpty(hdr.FileName))
219228
{
220229
break;
@@ -224,7 +233,7 @@ public static IEnumerable<TarFileData> EnumerateFiles(Stream archive)
224233
{
225234
yield return new(hdr, source: null);
226235
}
227-
else if (hdr.FileType == UnixFileType.TarEntryLongLink &&
236+
else if (hdr.FileType == TarFileType.TarEntryLongLink &&
228237
hdr.FileName == "././@LongLink")
229238
{
230239
var data = ArrayPool<byte>.Shared.Rent(checked((int)hdr.FileLength));
@@ -248,6 +257,30 @@ public static IEnumerable<TarFileData> EnumerateFiles(Stream archive)
248257
break;
249258
}
250259
}
260+
else if (hdr.FileType == TarFileType.TarEntryLongLinkTarget &&
261+
hdr.FileName == "././@LongLink")
262+
{
263+
var data = ArrayPool<byte>.Shared.Rent(checked((int)hdr.FileLength));
264+
try
265+
{
266+
archive.ReadExactly(data, 0, (int)hdr.FileLength);
267+
268+
long_link_path = EncodingUtilities
269+
.GetLatin1Encoding()
270+
.GetString(TarHeader.ReadNullTerminatedString(data.AsSpan(0, (int)hdr.FileLength)));
271+
}
272+
finally
273+
{
274+
ArrayPool<byte>.Shared.Return(data);
275+
}
276+
277+
var moveForward = (int)(-(hdr.FileLength & 511) & 511);
278+
279+
if (archive.ReadMaximum(hdrBuf, 0, moveForward) < moveForward)
280+
{
281+
break;
282+
}
283+
}
251284
else if (archive.CanSeek)
252285
{
253286
var location = archive.Position;
@@ -300,22 +333,33 @@ public static async IAsyncEnumerable<TarFileData> EnumerateFilesAsync(Stream arc
300333
var hdrBuf = StreamUtilities.GetUninitializedArray<byte>(512);
301334

302335
string long_path = null;
336+
string long_link_path = null;
303337

304338
for (; ; )
305339
{
340+
cancellationToken.ThrowIfCancellationRequested();
341+
306342
if (await archive.ReadMaximumAsync(hdrBuf.AsMemory(0, 512), cancellationToken).ConfigureAwait(false) < 512)
307343
{
308344
break;
309345
}
310346

311347
var hdr = new TarHeader(hdrBuf);
312348

313-
if (long_path is not null)
349+
if (long_path is not null
350+
&& hdr.FileType is not TarFileType.TarEntryLongLink and not TarFileType.TarEntryLongLinkTarget)
314351
{
315352
hdr.FileName = long_path;
316353
long_path = null;
317354
}
318355

356+
if (long_link_path is not null
357+
&& hdr.FileType is not TarFileType.TarEntryLongLink and not TarFileType.TarEntryLongLinkTarget)
358+
{
359+
hdr.LinkName = long_link_path;
360+
long_link_path = null;
361+
}
362+
319363
if (hdr.FileLength == 0 && string.IsNullOrEmpty(hdr.FileName))
320364
{
321365
break;
@@ -325,7 +369,7 @@ public static async IAsyncEnumerable<TarFileData> EnumerateFilesAsync(Stream arc
325369
{
326370
yield return new(hdr, source: null);
327371
}
328-
else if (hdr.FileType == UnixFileType.TarEntryLongLink &&
372+
else if (hdr.FileType == TarFileType.TarEntryLongLink &&
329373
hdr.FileName == "././@LongLink")
330374
{
331375
var data = ArrayPool<byte>.Shared.Rent(checked((int)hdr.FileLength));
@@ -349,6 +393,30 @@ public static async IAsyncEnumerable<TarFileData> EnumerateFilesAsync(Stream arc
349393
break;
350394
}
351395
}
396+
else if (hdr.FileType == TarFileType.TarEntryLongLinkTarget &&
397+
hdr.FileName == "././@LongLink")
398+
{
399+
var data = ArrayPool<byte>.Shared.Rent(checked((int)hdr.FileLength));
400+
try
401+
{
402+
await archive.ReadExactlyAsync(data.AsMemory(0, (int)hdr.FileLength), cancellationToken).ConfigureAwait(false);
403+
404+
long_link_path = EncodingUtilities
405+
.GetLatin1Encoding()
406+
.GetString(TarHeader.ReadNullTerminatedString(data.AsSpan(0, (int)hdr.FileLength)));
407+
}
408+
finally
409+
{
410+
ArrayPool<byte>.Shared.Return(data);
411+
}
412+
413+
var moveForward = (int)(-(hdr.FileLength & 511) & 511);
414+
415+
if (await archive.ReadMaximumAsync(hdrBuf.AsMemory(0, moveForward), cancellationToken).ConfigureAwait(false) < moveForward)
416+
{
417+
break;
418+
}
419+
}
352420
else if (archive.CanSeek)
353421
{
354422
var location = archive.Position;
@@ -378,7 +446,7 @@ public static async IAsyncEnumerable<TarFileData> EnumerateFilesAsync(Stream arc
378446
{
379447
var data = StreamUtilities.GetUninitializedArray<byte>((int)hdr.FileLength);
380448

381-
await archive.ReadExactlyAsync(data, cancellationToken).ConfigureAwait(false);
449+
await archive.ReadExactlyAsync(data.AsMemory(0, data.Length), cancellationToken).ConfigureAwait(false);
382450

383451
datastream = new MemoryStream(data, writable: false);
384452
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace DiscUtils.Archives;
6+
7+
/// <summary>
8+
/// Tar file types.
9+
/// </summary>
10+
public enum TarFileType : byte
11+
{
12+
/// <summary>
13+
/// TAR unknown entry.
14+
/// </summary>
15+
TarEntryUnknown = 0,
16+
17+
/// <summary>
18+
/// TAR regular file entry.
19+
/// </summary>
20+
TarEntryRegularFile = (byte)'0',
21+
22+
/// <summary>
23+
/// TAR link entry.
24+
/// </summary>
25+
TarEntryLink = (byte)'1',
26+
27+
/// <summary>
28+
/// TAR symbolic link entry.
29+
/// </summary>
30+
TarEntrySymbolicLink = (byte)'2',
31+
32+
/// <summary>
33+
/// TAR character special entry.
34+
/// </summary>
35+
TarEntryCharacter = (byte)'3',
36+
37+
/// <summary>
38+
/// TAR block device entry.
39+
/// </summary>
40+
TarEntryBlock = (byte)'4',
41+
42+
/// <summary>
43+
/// TAR directory entry.
44+
/// </summary>
45+
TarEntryDirectory = (byte)'5',
46+
47+
/// <summary>
48+
/// TAR FIFO special entry.
49+
/// </summary>
50+
TarEntryFifo = (byte)'6',
51+
52+
/// <summary>
53+
/// TAR contiguous file entry.
54+
/// </summary>
55+
TarEntryContiguous = (byte)'7',
56+
57+
/// <summary>
58+
/// TAR long symbolic link entry.
59+
/// </summary>
60+
TarEntryLongLinkTarget = (byte)'K',
61+
62+
/// <summary>
63+
/// TAR long file name entry.
64+
/// </summary>
65+
TarEntryLongLink = (byte)'L',
66+
}

Library/DiscUtils.Core/Archives/TarHeader.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ public sealed class TarHeader
3636
public long FileLength { get; }
3737
public DateTimeOffset ModificationTime { get; }
3838
public int CheckSum { get; }
39-
public UnixFileType FileType { get; }
40-
public string LinkName { get; }
39+
public TarFileType FileType { get; }
40+
public string LinkName { get; internal set; }
4141
public string Magic { get; }
4242
public int Version { get; }
4343
public string OwnerName { get; }
@@ -77,7 +77,7 @@ public TarHeader(ReadOnlySpan<byte> buffer)
7777
FileLength = ParseFileLength(buffer.Slice(124, 12));
7878
ModificationTime = DateTimeOffset.FromUnixTimeSeconds((uint)OctalToLong(ReadNullTerminatedString(buffer.Slice(136, 12))));
7979
CheckSum = (int)OctalToLong(ReadNullTerminatedString(buffer.Slice(148, 8)));
80-
FileType = (UnixFileType)buffer[156];
80+
FileType = (TarFileType)buffer[156];
8181
LinkName = latin1Encoding.GetString(ReadNullTerminatedString(buffer.Slice(157, 100)));
8282
Magic = latin1Encoding.GetString(ReadNullTerminatedString(buffer.Slice(257, 6)));
8383
Version = (int)OctalToLong(ReadNullTerminatedString(buffer.Slice(263, 2)));

Library/DiscUtils.Core/Internal/Utilities.cs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@
2020
// DEALINGS IN THE SOFTWARE.
2121
//
2222

23+
using DiscUtils.Archives;
2324
using LTRData.Extensions.Buffers;
2425
using LTRData.Extensions.Split;
2526
using System;
2627
using System.Collections.Concurrent;
2728
using System.Collections.Generic;
2829
using System.IO;
2930
using System.Linq;
31+
using System.Runtime.CompilerServices;
3032
using System.Text;
3133
using System.Text.RegularExpressions;
3234

@@ -662,5 +664,40 @@ public static bool StartsWithDirectorySeparator(this string path) =>
662664
public static bool EndsWithDirectorySeparator(this string path) =>
663665
path is not null && path.Length > 0 && (path[path.Length - 1] is '/' or '\\');
664666

665-
#endregion
667+
public static UnixFileType ToUnixFileType(this FileAttributes attributes)
668+
{
669+
if (attributes.HasFlag(FileAttributes.Directory))
670+
{
671+
return UnixFileType.Directory;
672+
}
673+
else if (attributes.HasFlag(FileAttributes.ReparsePoint))
674+
{
675+
return UnixFileType.Link;
676+
}
677+
else if (attributes.HasFlag(FileAttributes.Device))
678+
{
679+
// Could be block, char, fifo, socket - no way to distinguish
680+
return UnixFileType.Block;
681+
}
682+
else
683+
{
684+
return UnixFileType.Regular;
685+
}
686+
}
687+
688+
public static UnixFileType ToUnixFileType(this TarFileType entryType)
689+
{
690+
return entryType switch
691+
{
692+
TarFileType.TarEntryRegularFile or TarFileType.TarEntryContiguous => UnixFileType.Regular,
693+
TarFileType.TarEntryDirectory => UnixFileType.Directory,
694+
TarFileType.TarEntryLink or TarFileType.TarEntrySymbolicLink or TarFileType.TarEntryLongLinkTarget => UnixFileType.Link,
695+
TarFileType.TarEntryCharacter => UnixFileType.Character,
696+
TarFileType.TarEntryBlock => UnixFileType.Block,
697+
TarFileType.TarEntryFifo => UnixFileType.Fifo,
698+
_ => UnixFileType.None,
699+
};
700+
}
701+
702+
#endregion
666703
}

Library/DiscUtils.Core/UnixFileType.cs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,4 @@ public enum UnixFileType : byte
6666
/// A unix socket.
6767
/// </summary>
6868
Socket = 0xC,
69-
70-
/// <summary>
71-
/// TAR regular file entry.
72-
/// </summary>
73-
TarEntryRegularFile = 0x30,
74-
75-
/// <summary>
76-
/// TAR directory entry.
77-
/// </summary>
78-
TarEntryDirectory = 0x35,
79-
80-
/// <summary>
81-
/// TAR long file name entry.
82-
/// </summary>
83-
TarEntryLongLink = 0x4C
8469
}

0 commit comments

Comments
 (0)