Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ This is free and unencumbered software released into the public domain. For more
information, see <http://unlicense.org/> or the accompanying UNLICENSE file.
*/

// Package gedcom provides a functions to parse GEDCOM files.
package gedcom

import (
Expand Down Expand Up @@ -31,6 +30,9 @@ func NewDecoder(r io.Reader) *Decoder {
}
}

// LogUnhandledTags configures the decoder to log any unrecognized GEDCOM tags
// to the provided writer. This is useful for debugging GEDCOM files that
// contain non-standard or vendor-specific tags.
func (d *Decoder) LogUnhandledTags(w io.Writer) {
d.tagLogger = log.New(w, "", log.Lshortfile)
}
Expand Down
115 changes: 115 additions & 0 deletions doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
This is free and unencumbered software released into the public domain. For more
information, see <http://unlicense.org/> or the accompanying UNLICENSE file.
*/

/*
Package gedcom provides functions to parse and produce GEDCOM files.

GEDCOM (Genealogical Data Communication) is a standard format used for
exchanging genealogical data between software applications. This package
includes functionality for both parsing existing GEDCOM files and generating
new ones.

# Decoding GEDCOM Files

The package provides a streaming [Decoder] for reading GEDCOM files. Use
[NewDecoder] to create a decoder that reads from an [io.Reader]:

data, err := os.ReadFile("family.ged")
if err != nil {
log.Fatal(err)
}

d := gedcom.NewDecoder(bytes.NewReader(data))
g, err := d.Decode()
if err != nil {
log.Fatal(err)
}

for _, ind := range g.Individual {
if len(ind.Name) > 0 {
fmt.Println(ind.Name[0].Name)
}
}

The decoder is streaming and can handle large files without loading the entire
contents into memory.

# Encoding GEDCOM Files

The package also provides an [Encoder] for generating GEDCOM files. Use
[NewEncoder] to create an encoder that writes to an [io.Writer]:

g := &gedcom.Gedcom{
Header: &gedcom.Header{
SourceSystem: gedcom.SystemRecord{
Xref: "MyApp",
ProductName: "My Application",
},
CharacterSet: "UTF-8",
},
Individual: []*gedcom.IndividualRecord{
{
Xref: "I1",
Name: []*gedcom.NameRecord{
{Name: "John /Doe/"},
},
Sex: "M",
},
},
Trailer: &gedcom.Trailer{},
}

f, err := os.Create("output.ged")
if err != nil {
log.Fatal(err)
}
defer f.Close()

enc := gedcom.NewEncoder(f)
if err := enc.Encode(g); err != nil {
log.Fatal(err)
}

# Data Model

The [Gedcom] struct is the top-level container returned by the decoder and
accepted by the encoder. It contains slices of records for individuals,
families, sources, and other GEDCOM record types.

[IndividualRecord] represents a person and contains their names, sex, life
events (birth, death, etc.), family links, and citations.

[FamilyRecord] represents a family unit and links to husband, wife, and
children as [IndividualRecord] pointers.

[EventRecord] is a flexible type used for both events (birth, death, marriage)
and attributes (occupation, residence). The Tag field indicates the event type.

[SourceRecord] and [CitationRecord] handle source citations for genealogical
claims.

# Name Parsing

The [SplitPersonalName] helper function parses GEDCOM-formatted names:

parsed := gedcom.SplitPersonalName("John \"Jack\" /Smith/ Jr.")
// parsed.Given = "John"
// parsed.Nickname = "Jack"
// parsed.Surname = "Smith"
// parsed.Suffix = "Jr."

# User-Defined Tags

GEDCOM allows custom tags prefixed with an underscore. These are captured in
[UserDefinedTag] slices on most record types, preserving vendor-specific
extensions.

# Specification Coverage

This package implements approximately 80% of the GEDCOM 5.5 specification,
which is sufficient for parsing about 99% of real-world GEDCOM files. It has
not been extensively tested with non-ASCII character sets.
*/
package gedcom
9 changes: 8 additions & 1 deletion encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@ import (
"strings"
)

// An Encoder encodes and writes GEDCOM objects to an input stream.
// Encoder writes GEDCOM-encoded data to an output stream.
// Use [NewEncoder] to create an Encoder and [Encoder.Encode] to write
// a [Gedcom] structure.
//
// The encoder handles GEDCOM line length limits automatically, using
// CONT (continuation) and CONC (concatenation) tags to split long text.
type Encoder struct {
w *bufio.Writer
err error
Expand All @@ -26,6 +31,8 @@ func NewEncoder(w io.Writer) *Encoder {
}
}

// Encode writes the GEDCOM-encoded representation of g to the encoder's output stream.
// It writes the header, all records (individuals, families, sources, etc.), and trailer.
func (e *Encoder) Encode(g *Gedcom) error {
e.header(g.Header)

Expand Down
29 changes: 23 additions & 6 deletions helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,32 @@ import (
"strings"
)

// ParsedName contains the components of a personal name after parsing
// with [SplitPersonalName].
type ParsedName struct {
Full string
Given string
Surname string
Suffix string
Nickname string
Full string // Reconstructed full name without GEDCOM delimiters
Given string // Given name(s) / first name(s)
Surname string // Surname / family name / last name
Suffix string // Name suffix (e.g., "Jr.", "III", "PhD")
Nickname string // Nickname, if present in quotes
}

// SplitPersonalName parses a name in the format "First Name /Surname/ suffix" into its components.
// SplitPersonalName parses a GEDCOM-formatted personal name into its components.
// GEDCOM names use slashes to delimit the surname: "Given Names /Surname/ Suffix".
//
// Examples:
//
// SplitPersonalName("John /Smith/")
// // Returns: Given="John", Surname="Smith"
//
// SplitPersonalName("John \"Jack\" /Smith/ Jr.")
// // Returns: Given="John", Nickname="Jack", Surname="Smith", Suffix="Jr."
//
// SplitPersonalName("Mary Jane /van der Berg/")
// // Returns: Given="Mary Jane", Surname="van der Berg"
//
// The function also handles alternative surnames separated by slashes within
// the surname delimiters (e.g., "/Smith/Smyth/" becomes Surname="Smith/Smyth").
func SplitPersonalName(name string) ParsedName {
name = strings.TrimSpace(name)

Expand Down
36 changes: 25 additions & 11 deletions scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,32 @@ import (
"strconv"
)

// Line represents a single line from a GEDCOM file after tokenization.
// A GEDCOM line has the format: "level [xref] tag [value]"
// For example: "0 @I1@ INDI" or "1 NAME John /Smith/"
type Line struct {
Level int
Tag string
Value string
Xref string
LineNumber int // the line number of the input file
Offset int // the character offset in the input file
Level int // Hierarchy level (0 for top-level records)
Tag string // GEDCOM tag (e.g., "INDI", "NAME", "BIRT")
Value string // Optional value following the tag
Xref string // Optional cross-reference identifier (e.g., "I1" from "@I1@")
LineNumber int // Line number in the input file (1-indexed)
Offset int // Character offset in the input file
}

// String returns the line in GEDCOM format.
func (l *Line) String() string {
if l.Xref != "" {
return fmt.Sprintf("%d @%s@ %s %s", l.Level, l.Xref, l.Tag, l.Value)
}
return fmt.Sprintf("%d %s %s", l.Level, l.Tag, l.Value)
}

// A Scanner is a GEDCOM scanning state machine.
// Scanner tokenizes GEDCOM input line by line. It is a low-level component
// used by [Decoder]. Most users should use [Decoder] directly instead.
//
// A Scanner reads from an [io.RuneScanner] and breaks the input into
// GEDCOM lines, parsing the level, optional cross-reference, tag, and
// optional value from each line.
type Scanner struct {
r io.RuneScanner
err error
Expand All @@ -41,7 +50,9 @@ type Scanner struct {
xref string
}

// NewScanner creates a new Scanner ready for use.
// NewScanner creates a new Scanner that reads from r.
// Use [Scanner.Next] to advance through the input and [Scanner.Line]
// to retrieve the current line after each successful call to Next.
func NewScanner(r io.RuneScanner) *Scanner {
return &Scanner{
r: r,
Expand Down Expand Up @@ -307,10 +318,13 @@ func (s *Scanner) Err() error {
return s.err
}

// ScanErr represents a scanning error with location information.
// It wraps the underlying error and includes the line number and
// character offset where the error occurred.
type ScanErr struct {
Err error
LineNumber int
Offset int
Err error // The underlying error
LineNumber int // Line number where the error occurred (1-indexed)
Offset int // Character offset within the line
}

func (e *ScanErr) Error() string {
Expand Down
Loading