diff --git a/adsdata/process.py b/adsdata/process.py
index da12db1..6ec47d3 100644
--- a/adsdata/process.py
+++ b/adsdata/process.py
@@ -1,4 +1,3 @@
-
 from datetime import datetime
 from collections import defaultdict
 
@@ -19,6 +18,8 @@ def __init__(self, compute_metrics=True, compute_CC = False):
             self.data_dict = data_files
         self.logger = tasks.app.logger
         self.readers = {}
+        self.master_protobuf = self._get_master_nonbib_dict()
+        
 
     def __enter__(self):
         self._open_all()
@@ -27,6 +28,46 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_value, traceback):
         self._close_all()
 
+    def _get_master_nonbib_dict(self):
+        # Template for the new protobuf structure
+        return {
+            "identifier": [], # Master Pipeline 
+            "links": {
+                "ARXIV": [], # Master Pipeline
+                "DOI": [], # Master Pipeline
+                "DATA": {},
+                "ESOURCE": {},
+                "ASSOCIATED": {
+                    "url": [],
+                    "title": [],
+                    "count": 0
+                },
+                "INSPIRE": {
+                    "url": [],
+                    "title": [],
+                    "count": 0
+                },
+                "LIBRARYCATALOG": {
+                    "url": [],
+                    "title": [],
+                    "count": 0
+                },
+                "PRESENTATION": {
+                    "url": [],
+                    "title": [],
+                    "count": 0
+                },
+                "ABSTRACT": False,  # Master Pipeline
+                "CITATIONS": False, # Master Pipeline
+                "GRAPHICS": False,  # Master Pipeline
+                "METRICS": False,   # Master Pipeline
+                "OPENURL": False,   # Master Pipeline
+                "REFERENCES": False,# Master Pipeline
+                "TOC": False,       # Master Pipeline
+                "COREAD": False     # Master Pipeline
+            }
+        }
+
     def process_bibcodes(self, bibcodes):
         """send nonbib and metrics records to master for the passed bibcodes
         for each bibcode
@@ -54,79 +95,118 @@ def process_bibcodes(self, bibcodes):
         tasks.task_output_metrics.delay(metrics_protos)
 
     def _convert(self, passed):
-        """convert full nonbib dict to what is needed for nonbib protobuf
-        data links values are read from separate files so they are in separate dicts
-            they must be merged into one field for the protobuf
-        a couple fields are summarized
-        some other fields are just copied
-        some fields are deleted
+        """Convert full nonbib dict to what is needed for nonbib protobuf.
+        
+        Data links values are read from separate files and merged into one field.
+        The method handles:
+        - Data link processing and merging
+        - Property aggregation
+        - Field summarization and copying
+        - Computed field generation
+        - Cleanup of unused fields
+        
+        Args:
+            passed (dict): Raw data dictionary containing all input fields
+            
+        Returns:
+            dict: Processed data ready for nonbib protobuf
         """
-        return_value = {}
-        return_value['data_links_rows'] = []
-        return_value['property'] = set()
-        return_value['esource'] = set()
+        # Initialize return structure
+        return_value = {
+            "data_links_rows": [], 
+            "property": set(), 
+            "esource": set()
+        }
+          
         for filetype, value in passed.items():
-            file_properties = self.data_dict[filetype] #data_files[filetype]
+            file_properties = self.data_dict[filetype]
+            default_value = file_properties.get('default_value')
+            extra_values = file_properties.get('extra_values', {})
+          
+            # Handle special cases first
             if filetype == 'canonical':
                 return_value['bibcode'] = passed['canonical']
-            if (value is dict and dict and 'property' in value[filetype]):
-                return_value['property'].update(value[filetype]['property'])
-            if (type(file_properties['default_value']) is bool):
+                continue
+            
+            if filetype == 'relevance':
+                return_value.update(passed[filetype])
+                continue
+        
+            # Handle boolean fields and TOC
+            if isinstance(default_value, bool):
                 return_value[filetype] = value[filetype]
                 value = value[filetype]
-            if ('extra_values' in file_properties and 'link_type' in file_properties['extra_values'] and value != file_properties['default_value']):
-                # here with one or more real datalinks value(s)
-                # add each data links dict to existing list of dicts
-                # tweak some values (e.g., sub_link_type) in original dict
-                if type(value) is bool or type(value) is dict:
-                    d = self._convert_data_link(filetype, value)
-                    return_value['data_links_rows'].append(d)
-                elif type(value) is list:
-                    for v in value:
-                        d = self._convert_data_link(filetype, v)
-                        return_value['data_links_rows'].append(d)
+            
+            # Process data links
+            if 'link_type' in extra_values and value != default_value:
+                # Convert and add data links
+                if isinstance(value, (bool, dict)):
+                    return_value['data_links_rows'].append(
+                        self._convert_data_link(filetype, value))
+                elif isinstance(value, list):
+                    return_value['data_links_rows'].extend(
+                        self._convert_data_link(filetype, v) for v in value)
                 else:
-                    self.logger.error('serious error in process._convert with {} {} {}'.format(filetype, type(value), value))
-
-                if file_properties['extra_values']['link_type'] == 'ESOURCE':
-                    return_value['esource'].add(file_properties['extra_values']['link_sub_type'])
-                return_value['property'].add(file_properties['extra_values']['link_type'])
-                return_value['property'].update(file_properties['extra_values'].get('property', []))
-            elif ('extra_values' in file_properties and value != file_properties['default_value']):
-                if 'property' in file_properties['extra_values']:
-                    return_value['property'].update(file_properties['extra_values']['property'])
-
-            elif value != file_properties['default_value'] or file_properties.get('copy_default', False):
-                # otherwise, copy value
+                    self.logger.error(
+                        f'serious error in process._convert with {filetype} {type(value)} {value}')
+                    continue
+                
+                # Update esource and properties
+                link_type = extra_values['link_type']
+                if link_type == 'ESOURCE':
+                    return_value['esource'].add(extra_values['link_sub_type'])
+                return_value['property'].add(link_type)
+                return_value['property'].update(extra_values.get('property', []))
+            
+            # Handle properties
+            elif extra_values and value != default_value:
+                if 'property' in extra_values:
+                    return_value['property'].update(extra_values['property'])
+            
+            # Copy remaining fields if needed
+            elif value != default_value or file_properties.get('copy_default', False):
                 return_value[filetype] = passed[filetype]
-            if filetype == 'relevance':
-                for k in passed[filetype]:
-                    # simply add all dict value to top level
-                    return_value[k] = passed[filetype][k]
-
+        
+        # Add computed properties
         self._add_refereed_property(return_value)
         self._add_article_property(return_value, passed)
+        self._add_data_summary(return_value)
+        self._add_citation_count_fields(return_value, passed)
+        
+        # Sort sets
         return_value['property'] = sorted(return_value['property'])
         return_value['esource'] = sorted(return_value['esource'])
-        self._add_data_summary(return_value)
+        
+        # Merge and process data links
         return_value['data_links_rows'] = self._merge_data_links(return_value['data_links_rows'])
-        self._add_citation_count_fields(return_value, passed)
 
-        # time for computed fields
-        for k, v in computed_fields.items():
-            f = getattr(self, v['converter_function'], None)
-            if f is None:
-                self.logger.error('serious error in process._covert, expected converter_function {} for field {} not found'.format(v['converter_function'], k))
+        master_template = self._get_master_nonbib_dict()
+        
+        # Populate the new protobuf structure with link data
+        self._populate_new_links_structure(return_value['data_links_rows'], master_template)
+        
+        # Add computed fields
+        for field_name, field_config in computed_fields.items():
+            converter = getattr(self, field_config['converter_function'], None)
+            if converter:
+                return_value.update(converter(return_value))
             else:
-                x = f(return_value)
-                return_value.update(x)
-
-        # finally, delete the keys not in the nonbib protobuf
-        not_needed = ['author', 'canonical', 'citation', 'deleted', 'deprecated_citation_count', 'doi', 'download', 'item_count', 'nonarticle',
-                      'ocrabstract', 'preprint', 'private', 'pub_openaccess', 'pub2arxiv',
-                      'reads', 'refereed', 'relevance', 'toc']
-        for n in not_needed:
-            return_value.pop(n, None)
+                self.logger.error(
+                    f'serious error in process._convert, expected converter_function '
+                    f'{field_config["converter_function"]} for field {field_name} not found')
+        
+        # Remove unused fields
+        unused_fields = {
+            'author', 'canonical', 'citation', 'deleted', 'deprecated_citation_count',
+            'doi', 'download', 'item_count', 'nonarticle', 'ocrabstract', 'preprint',
+            'private', 'pub_openaccess', 'pub2arxiv', 'reads', 'refereed',
+            'relevance', 'toc'
+        }
+        for field in unused_fields:
+            return_value.pop(field, None)
+        return_value.update(master_template)
+        return_value.pop('data_links_rows')
+        self.logger.debug('Processed nonbib data: {}'.format(return_value))
         return return_value
 
     def _add_citation_count_fields(self, return_value, original):
@@ -145,11 +225,11 @@ def _add_refereed_property(self, return_value):
         if'REFEREED' not in return_value['property']:
             return_value['property'].add('NOT REFEREED')
 
-    def _add_article_property(self, return_value, d):
-        x = d.get('nonarticle', False)
-        if type(x) is dict:
-            x = x['nonarticle']
-        if x:
+    def _add_article_property(self, return_value, passed):
+        nonarticle_value = passed.get('nonarticle', False)
+        if isinstance(nonarticle_value, dict):
+            nonarticle_value = nonarticle_value['nonarticle']
+        if nonarticle_value:
             return_value['property'].add('NONARTICLE')
         else:
             return_value['property'].add('ARTICLE')
@@ -199,36 +279,50 @@ def _merge_data_links(self, datalinks):
 
     def _convert_data_link(self, filetype, value):
         """convert one data link row"""
-        file_properties = self.data_dict[filetype] #data_files[filetype]
-        d = {}
-        d['link_type'] = file_properties['extra_values']['link_type']
+        
+        self.logger.debug('Converting data link: {}'.format(value))
+        file_properties = self.data_dict[filetype]
+
+        link_type = file_properties['extra_values']['link_type']
+        link_sub_type = file_properties['extra_values'].get('link_sub_type', '')
         link_sub_type_suffix = ''
-        if value is dict and 'subparts' in value and 'item_count' in value['subparts']:
-            link_sub_type_suffix = ' ' + str(value['subparts']['item_count'])
-        if value is True:
-            d['link_sub_type'] = file_properties['extra_values']['link_sub_type'] + link_sub_type_suffix
-        elif 'link_sub_type' in value:
-            d['link_sub_type'] = value['link_sub_type'] + link_sub_type_suffix
-        elif 'link_sub_type' in file_properties['extra_values']:
-            d['link_sub_type'] = file_properties['extra_values']['link_sub_type'] + link_sub_type_suffix
-        if type(value) is bool:
-            d['url'] = ['']
-            d['title'] = ['']
-            d['item_count'] = 0
-        elif type(value) is dict:
-            d['url'] = value.get('url', [''])
-            if type(d['url']) is str:
-                d['url'] = [d['url']]
-            d['title'] = value.get('title', [''])
-            if type(d['title']) is str:
-                d['title'] = [d['title']]
-            # if d['title'] == ['']:
-            #    d.pop('title')  # to match old pipeline
-            d['item_count'] = value.get('item_count', 0)
-        else:
-            self.logger.error('serious error in process.convert_data_link: unexpected type for value, filetype = {}, value = {}, type of value = {}'.format(filetype, value, type(value)))
 
-        return d
+        if isinstance(value, dict) and 'subparts' in value:
+            link_sub_type_suffix = f" {value['subparts'].get('item_count', '')}".strip()
+        
+        # Determine the link sub type
+        if not link_sub_type and isinstance(value, dict) and 'link_sub_type' in value:
+            link_sub_type = value['link_sub_type']
+        
+
+        link_sub_type += link_sub_type_suffix
+        
+        # Initialize result dictionary
+        link_data =  {  'link_type': link_type, 
+                        'link_sub_type': link_sub_type,
+                        "url": [""],
+                        "title": [""],
+                        "item_count": 0
+                    }
+                
+        if isinstance(value, dict):
+            link_data['url'] = value.get('url', [''])
+            link_data['title'] = value.get('title', [''])
+            link_data['item_count'] = value.get('item_count', 0)
+            
+            self.logger.debug('Link data before conversion: {}'.format(link_data))
+            if isinstance(link_data['url'], str):
+                link_data['url'] = [link_data['url']]
+            if isinstance(link_data['title'], str):
+                link_data['title'] = [link_data['title']]
+            self.logger.debug('Link data after conversion: {}'.format(link_data))
+        elif not isinstance(value, bool):
+            self.logger.error(
+                f"Serious error in process.convert_data_link: unexpected type for value, filetype = {filetype}, "
+                f"value = {value}, type of value = {type(value)}"
+            )
+        self.logger.debug('Converted data link: {}'.format(link_data))
+        return link_data
 
     def _read_next_bibcode(self, bibcode):
         """read all the info for the passed bibcode into a dict"""
@@ -324,3 +418,55 @@ def _compute_bibgroup_facet(self, d):
             return {}
         bibgroup_facet = sorted(list(set(bibgroup)))
         return {'bibgroup_facet': bibgroup_facet}
+
+    def _populate_new_links_structure(self, data_links_rows, master_template):
+        """Populate the new protobuf links structure from data_links_rows.
+        Maps the flat data_links_rows into the hierarchical links structure."""
+
+        self.logger.debug('Populating new links structure: {}'.format(data_links_rows))
+        
+        # Map for link types that need special handling
+        link_type_mapping = {
+            'DATA': 'DATA',
+            'ESOURCE': 'ESOURCE',
+            'ASSOCIATED': 'ASSOCIATED',
+            'INSPIRE': 'INSPIRE',
+            'LIBRARYCATALOG': 'LIBRARYCATALOG',
+            'PRESENTATION': 'PRESENTATION'
+        }
+        
+        for row in data_links_rows:
+            link_type = row.get('link_type', '')
+            
+            # Skip if not in our mapping
+            if link_type not in link_type_mapping:
+                continue
+                
+            mapped_type = link_type_mapping[link_type]
+            
+            # Handle DATA and ESOURCE which have sub_type structure
+            if mapped_type in ('DATA', 'ESOURCE'):
+                sub_type = row.get('link_sub_type', '')
+                if sub_type not in master_template['links'][mapped_type]:
+                    master_template['links'][mapped_type][sub_type] = {
+                        'url': [],
+                        'title': [],
+                        'count': 0
+                    }
+                if 'url' in row:
+                    master_template['links'][mapped_type][sub_type]['url'].extend(row['url'])
+                if 'title' in row:
+                    master_template['links'][mapped_type][sub_type]['title'].extend(row['title'])
+                if 'item_count' in row:
+                    master_template['links'][mapped_type][sub_type]['count'] = row['item_count']
+            
+            # Handle other link types with direct structure
+            else:
+                if 'url' in row:
+                    master_template['links'][mapped_type]['url'].extend(row['url'])
+                if 'title' in row:
+                    master_template['links'][mapped_type]['title'].extend(row['title'])
+                if 'item_count' in row:
+                    master_template['links'][mapped_type]['count'] = row['item_count']
+        self.logger.debug('Populated new links structure: {}'.format(master_template))
+        return master_template
\ No newline at end of file
diff --git a/adsdata/tests/test_process.py b/adsdata/tests/test_process.py
index 3cbb484..080c349 100644
--- a/adsdata/tests/test_process.py
+++ b/adsdata/tests/test_process.py
@@ -1,4 +1,3 @@
-
 import unittest
 from mock import patch, mock_open
 from datetime import datetime
@@ -85,45 +84,60 @@ def test_nonbib_record(self):
         with Processor(compute_metrics=False) as processor, patch('adsputils.load_config', return_value={'INPUT_DATA_ROOT': './test/data1/config/'}):
             d = processor._read_next_bibcode('2003ASPC..295..361M')
             n = processor._convert(d)
-            a = {"read_count": 4, "bibcode": "2003ASPC..295..361M",
-                 'bibgroup': ['Chandra Technical'], 'bibgroup_facet': ['Chandra Technical'],
-                 "data_links_rows": [{"url": ["http://articles.adsabs.harvard.edu/pdf/2003ASPC..295..361M"], "link_type": "ESOURCE", "link_sub_type": "ADS_PDF", 'item_count': 0, 'title': ['']},
-                                     {"url": ["http://articles.adsabs.harvard.edu/full/2003ASPC..295..361M"], "link_type": "ESOURCE", "link_sub_type": "ADS_SCAN", 'item_count': 0, 'title': ['']},
-                                     {"url": [""], "link_type": "TOC", "link_sub_type": "NA", 'item_count': 0, 'title': ['']}],
-                 "esource": ["ADS_PDF", "ADS_SCAN"], "property": ["ADS_OPENACCESS", "ARTICLE", "ESOURCE", "NOT REFEREED", "OPENACCESS", "TOC"], "boost": 0.15, 'citation_count': 0, 'reference_count': 0, 'credit_count': 0, 'mention': ['2020xxxx.soft.....X', '2021yyyy.soft.....Y'], 'mention_count': 2,'norm_cites': 0, 'citation_count_norm': 0.0, 'data': [], 'total_link_counts': 0}
+            a = {'property': ['ADS_OPENACCESS', 'ARTICLE', 'ESOURCE', 'NOT REFEREED', 'OPENACCESS', 'TOC'], 'esource': ['ADS_PDF', 'ADS_SCAN'], 
+                 'bibcode': '2003ASPC..295..361M', 'bibgroup': ['Chandra Technical'], 'boost': 0.15, 'reference_count': 0, 'credit_count': 0, 'mention': ['2020xxxx.soft.....X', '2021yyyy.soft.....Y'], 'mention_count': 2,'read_count': 4, 'norm_cites': 0, 'data': [], 
+                 'total_link_counts': 0, 'citation_count': 0, 'citation_count_norm': 0.0, 
+                 'bibgroup_facet': ['Chandra Technical'], 'identifier': [], 
+                 'links': {'ARXIV': [], 'DOI': [], 'DATA': {}, 
+                           'ESOURCE': {'ADS_PDF': {'url': ['http://articles.adsabs.harvard.edu/pdf/2003ASPC..295..361M'], 'title': [''], 'count': 0}, 
+                                       'ADS_SCAN': {'url': ['http://articles.adsabs.harvard.edu/full/2003ASPC..295..361M'], 'title': [''], 'count': 0}}, 
+                                       'ASSOCIATED': {'url': [], 'title': [], 'count': 0}, 'INSPIRE': {'url': [], 'title': [], 'count': 0}, 
+                                       'LIBRARYCATALOG': {'url': [], 'title': [], 'count': 0}, 'PRESENTATION': {'url': [], 'title': [], 'count': 0}, 
+                                       'ABSTRACT': False,  # Master Pipeline will set to True
+                                       'CITATIONS': False, 
+                                       'GRAPHICS': False,  # Master Pipeline will set to True
+                                       'METRICS': False, 
+                                       'OPENURL': False,   # Master Pipeline will set to True
+                                       'REFERENCES': False, 
+                                       'TOC': False, 
+                                       'COREAD': False}}   # Master Pipeline will set to True
             self.assertEqual(a, n)
+            self._validate_nonbib_structure(n)
 
             d = processor._read_next_bibcode('2004MNRAS.354L..31M')
             v = processor._convert(d)
-            a = {"bibcode": "2004MNRAS.354L..31M",
-                 "simbad_objects": ["3253618 G"],
-                 "read_count": 20,
-                 "data_links_rows": [{"url": ["http://dx.doi.org/10.1111/j.1365-2966.2004.08374.x"], "link_type": "ESOURCE", "link_sub_type": "PUB_HTML", 'item_count': 0, 'title': ['']},
-                                     {"url": ["https://arxiv.org/abs/astro-ph/0405472"], "link_type": "ESOURCE", "link_sub_type": "EPRINT_HTML", 'item_count': 0, 'title': ['']},
-                                     {"url": ["https://academic.oup.com/mnras/pdf-lookup/doi/10.1111/j.1365-2966.2004.08374.x"], "link_type": "ESOURCE", "link_sub_type": "PUB_PDF", 'item_count': 0, 'title': ['']},
-                                     {"url": ["http://articles.adsabs.harvard.edu/pdf/2004MNRAS.354L..31M"], "link_type": "ESOURCE", "link_sub_type": "ADS_PDF", 'item_count': 0, 'title': ['']},
-                                     {"url": ["https://arxiv.org/pdf/astro-ph/0405472"], "link_type": "ESOURCE", "link_sub_type": "EPRINT_PDF", 'item_count': 0, 'title': ['']},
-                                     {"url": ["http://articles.adsabs.harvard.edu/full/2004MNRAS.354L..31M"], "link_type": "ESOURCE", "link_sub_type": "ADS_SCAN", 'item_count': 0, 'title': ['']},
-                                     {"url": ["2004MNRAS.354L..31M", "2005yCat..73549031M"], "title": ["Source Paper", "Catalog Description"], "link_type": "ASSOCIATED", "link_sub_type": "NA", 'item_count': 0},
-                                     {"url": ["http://inspirehep.net/search?p=find+j+MNRAA,354,L31"], "link_type": "INSPIRE", "link_sub_type": "NA", 'item_count': 0, 'title': ['']},
-                                     {"url": ["http://$VIZIER$/viz-bin/VizieR?-source=J/MNRAS/354/L31"], "item_count": 1, "link_type": "DATA", "link_sub_type": "CDS", 'title': ['']},
-                                     {"url": ["https://$NED$/cgi-bin/objsearch?search_type=Search&refcode=2004MNRAS.354L..31M"], "title": ["NED Objects (1953)"], "item_count": 1953, "link_type": "DATA", "link_sub_type": "NED"},
-                                     {"url": ["http://$SIMBAD$/simbo.pl?bibcode=2004MNRAS.354L..31M"], "title": ["SIMBAD Objects (1)"], "item_count": 1, "link_type": "DATA", "link_sub_type": "SIMBAD"},
-                                     {"url": ["http://$VIZIER$/viz-bin/VizieR?-source=J/MNRAS/354/L31"], "item_count": 1, "link_type": "DATA", "link_sub_type": "Vizier", 'title': ['']}],
-                 "norm_cites": 10000,
-                 "data": ["CDS:1", "NED:1953", "SIMBAD:1", "Vizier:1"],
-                 "citation_count_norm": 49.5,
-                 "citation_count": 99,
+            a = {'property': ['ADS_OPENACCESS', 'ARTICLE', 'ASSOCIATED', 'DATA', 'EPRINT_OPENACCESS', 'ESOURCE', 'INSPIRE', 'OPENACCESS', 'PUB_OPENACCESS', 'REFEREED'],
                  "reference": ["2004PhRvL..92q6804N", "1989TSF...171....5T"],
                  "reference_count": 2,
                  "credit": ["2001CoPhC.136..319S"],
                  "credit_count": 1,
                  "mention": ["2020xxxx.soft.....X"],
                  "mention_count": 1,
-                 "property": ["ADS_OPENACCESS", "ARTICLE", "ASSOCIATED", "DATA", "EPRINT_OPENACCESS", "ESOURCE", "INSPIRE", "OPENACCESS", "PUB_OPENACCESS", "REFEREED"],
-                 "total_link_counts": 1956,
-                 "esource": ["ADS_PDF", "ADS_SCAN", "EPRINT_HTML", "EPRINT_PDF", "PUB_HTML", "PUB_PDF"],
-                 "boost": 0.4399999976158142}
+                 'esource': ['ADS_PDF', 'ADS_SCAN', 'EPRINT_HTML', 'EPRINT_PDF', 'PUB_HTML', 'PUB_PDF'], 
+                 'bibcode': '2004MNRAS.354L..31M', 'boost': 0.44, 'read_count': 20, 'norm_cites': 10000, 
+                 'simbad_objects': ['3253618 G'], 'data': ['CDS:1', 'NED:1953', 'SIMBAD:1', 'Vizier:1'], 
+                 'total_link_counts': 1956, 'citation_count': 99, 'citation_count_norm': 49.5, 'identifier': [], 
+                 'links': {'ARXIV': [], 'DOI': [], 'DATA': {'CDS': {'url': ['http://$VIZIER$/viz-bin/VizieR?-source=J/MNRAS/354/L31'], 'title': [''], 'count': 1}, 
+                                                            'NED': {'url': ['https://$NED$/cgi-bin/objsearch?search_type=Search&refcode=2004MNRAS.354L..31M'], 'title': ['NED Objects (1953)'], 'count': 1953}, 
+                                                            'SIMBAD': {'url': ['http://$SIMBAD$/simbo.pl?bibcode=2004MNRAS.354L..31M'], 'title': ['SIMBAD Objects (1)'], 'count': 1}, 
+                                                            'Vizier': {'url': ['http://$VIZIER$/viz-bin/VizieR?-source=J/MNRAS/354/L31'], 'title': [''], 'count': 1}}, 
+                                                    'ESOURCE': {'ADS_PDF': {'url': ['http://articles.adsabs.harvard.edu/pdf/2004MNRAS.354L..31M'], 'title': [''], 'count': 0}, 
+                                                                'ADS_SCAN': {'url': ['http://articles.adsabs.harvard.edu/full/2004MNRAS.354L..31M'], 'title': [''], 'count': 0}, 
+                                                                'PUB_HTML': {'url': ['http://dx.doi.org/10.1111/j.1365-2966.2004.08374.x'], 'title': [''], 'count': 0}, 
+                                                                'EPRINT_HTML': {'url': ['https://arxiv.org/abs/astro-ph/0405472'], 'title': [''], 'count': 0}, 
+                                                                'PUB_PDF': {'url': ['https://academic.oup.com/mnras/pdf-lookup/doi/10.1111/j.1365-2966.2004.08374.x'], 'title': [''], 'count': 0}, 
+                                                                'EPRINT_PDF': {'url': ['https://arxiv.org/pdf/astro-ph/0405472'], 'title': [''], 'count': 0}}, 
+                                                    'ASSOCIATED': {'url': ['2004MNRAS.354L..31M', '2005yCat..73549031M'], 'title': ['Source Paper', 'Catalog Description'], 'count': 0}, 
+                                                    'INSPIRE': {'url': ['http://inspirehep.net/search?p=find+j+MNRAA,354,L31'], 'title': [''], 'count': 0}, 'LIBRARYCATALOG': {'url': [], 'title': [], 'count': 0},
+                                                    'PRESENTATION': {'url': [], 'title': [], 'count': 0},
+                                                    'ABSTRACT': False,  # Master Pipeline will set to True
+                                                    'CITATIONS': False, 
+                                                    'GRAPHICS': False,  # Master Pipeline will set to True
+                                                    'METRICS': False, 
+                                                    'OPENURL': False,   # Master Pipeline will set to True
+                                                    'REFERENCES': False, 
+                                                    'TOC': False, 
+                                                    'COREAD': False}}   # Master Pipeline will set to True
             v_boost = v.pop('boost')
             a_boost = a.pop('boost')
             self.assertAlmostEqual(a_boost, v_boost)
@@ -133,6 +147,107 @@ def test_nonbib_record(self):
         # consider library 1810hdla.book.....V
         # consider inspire 1908PASP...20....1.
 
+    def _validate_nonbib_structure(self, record):
+        """Validate that the nonbib record has all required fields with correct types"""
+        
+        # Required string fields
+        self.assertIn('bibcode', record)
+        self.assertIn('identifier', record)
+        self.assertIsInstance(record['bibcode'], str)
+        self.assertIsInstance(record['identifier'], list)
+        
+        # Required numeric fields
+        numeric_fields = {
+            'boost': float,
+            'citation_count': int,
+            'read_count': int,
+            'total_link_counts': int,
+            'norm_cites': int,
+            'citation_count_norm': float
+        }
+        for field, expected_type in numeric_fields.items():
+            self.assertIn(field, record)
+            self.assertIsInstance(record[field], expected_type, 
+                                 f"Field {field} should be {expected_type.__name__}")
+        
+        # Required array fields 
+        required_array_fields = [
+            'property',  
+            'esource',   
+            'data',      
+            'identifier' 
+        ]
+
+        # Optional array fields 
+        optional_array_fields = [
+            'simbad_objects',
+            'grants',
+            'readers',
+            'reference',
+            'ned_objects',
+            'bibgroup',
+            'bibgroup_facet',
+            'gpn',
+            'uat'
+        ]
+
+        # Check required array fields
+        for field in required_array_fields:
+            self.assertIn(field, record)
+            self.assertIsInstance(record[field], list,
+                                 f"Field {field} should be a list")
+
+        # Check optional array fields if present
+        for field in optional_array_fields:
+            if field in record:
+                self.assertIsInstance(record[field], list,
+                                     f"Field {field} should be a list")
+        
+        # Validate links structure
+        self.assertIn('links', record)
+        links = record['links']
+        self.assertIsInstance(links, dict)
+        
+        # Direct link arrays
+        for field in ['ARXIV', 'DOI']:
+            self.assertIn(field, links)
+            self.assertIsInstance(links[field], (list))
+        
+        # Mapped link types
+        for field in ['DATA', 'ESOURCE']:
+            self.assertIn(field, links)
+            self.assertIsInstance(links[field], dict)
+            
+            # If there are subtypes, validate their structure
+            for subtype, value in links[field].items():
+                self.assertIsInstance(value, dict)
+                self.assertIn('url', value)
+                self.assertIsInstance(value['url'], (list))
+                self.assertIn('title', value)
+                self.assertIsInstance(value['title'], (list))
+                self.assertIn('count', value)
+                self.assertIsInstance(value['count'], int)
+        
+        # Link type records
+        for field in ['ASSOCIATED', 'INSPIRE', 'LIBRARYCATALOG', 'PRESENTATION']:
+            self.assertIn(field, links)
+            self.assertIsInstance(links[field], dict)
+            self.assertIn('url', links[field])
+            self.assertIsInstance(links[field]['url'], (list))
+            self.assertIn('title', links[field])
+            self.assertIsInstance(links[field]['title'], (list))
+            self.assertIn('count', links[field])
+            self.assertIsInstance(links[field]['count'], int)
+        
+        # Boolean flags
+        boolean_flags = [
+            'ABSTRACT', 'CITATIONS', 'GRAPHICS', 'METRICS',
+            'OPENURL', 'REFERENCES', 'TOC', 'COREAD'
+        ]
+        for field in boolean_flags:
+            self.assertIn(field, links)
+            self.assertIsInstance(links[field], bool, f"Links field {field} should be a boolean")
+
     def test_add_data_summary(self):
         self.maxDiff = None
         with Processor(compute_metrics=False) as processor, patch('adsputils.load_config', return_value={'INPUT_DATA_ROOT': './test/data1/config/'}):
@@ -252,3 +367,87 @@ def test_compute_bibgroup_facet(self):
         self.assertEqual({'bibgroup_facet': ['a']}, p._compute_bibgroup_facet({'bibgroup': ['a']}))
         self.assertEqual({'bibgroup_facet': ['a', 'b']}, p._compute_bibgroup_facet({'bibgroup': ['a', 'b']}))
         self.assertEqual({'bibgroup_facet': ['a', 'b']}, p._compute_bibgroup_facet({'bibgroup': ['a', 'b', 'a']}))
+
+    def test_multiple_bibcodes_no_link_leakage(self):
+        """Verify links don't leak between bibcodes when processing sequentially"""
+        self.maxDiff = None
+        
+        with Processor(compute_metrics=False) as processor, patch('adsputils.load_config', return_value={'INPUT_DATA_ROOT': './test/data1/config/'}):
+            # Process bibcode A - has ADS_PDF and ADS_SCAN esources
+            bibcode_a = '2003ASPC..295..361M'
+            d_a = processor._read_next_bibcode(bibcode_a)
+            result_a = processor._convert(d_a)
+            
+            # Verify A has only its own ESOURCE links
+            self.assertIn('ESOURCE', result_a['links'])
+            esource_a = result_a['links']['ESOURCE']
+            self.assertIn('ADS_PDF', esource_a)
+            self.assertIn('ADS_SCAN', esource_a)
+            
+            # Store A's link counts for later comparison
+            ads_pdf_urls_a = list(esource_a['ADS_PDF']['url'])
+            ads_scan_urls_a = list(esource_a['ADS_SCAN']['url'])
+            
+            # Verify A has only one URL per link type (its own)
+            self.assertEqual(len(ads_pdf_urls_a), 1, 
+                           f"Bibcode A should have exactly 1 ADS_PDF URL, got {len(ads_pdf_urls_a)}")
+            self.assertEqual(len(ads_scan_urls_a), 1,
+                           f"Bibcode A should have exactly 1 ADS_SCAN URL, got {len(ads_scan_urls_a)}")
+            
+            # Verify URLs contain the correct bibcode
+            self.assertIn(bibcode_a, ads_pdf_urls_a[0])
+            self.assertIn(bibcode_a, ads_scan_urls_a[0])
+            
+            # Now process bibcode B - has different esources (includes PUB_HTML, EPRINT_HTML, etc.)
+            bibcode_b = '2004MNRAS.354L..31M'
+            d_b = processor._read_next_bibcode(bibcode_b)
+            result_b = processor._convert(d_b)
+            
+            # Verify B has its own ESOURCE links
+            self.assertIn('ESOURCE', result_b['links'])
+            esource_b = result_b['links']['ESOURCE']
+            
+            # B should have ADS_PDF and ADS_SCAN (from its own data)
+            self.assertIn('ADS_PDF', esource_b)
+            self.assertIn('ADS_SCAN', esource_b)
+            
+            # B should have only its own URLs, NOT A's URLs
+            ads_pdf_urls_b = esource_b['ADS_PDF']['url']
+            ads_scan_urls_b = esource_b['ADS_SCAN']['url']
+            
+            # Check that B's URLs don't contain A's bibcode
+            for url in ads_pdf_urls_b:
+                self.assertNotIn(bibcode_a, url, 
+                               f"Bibcode B's ADS_PDF links leaked bibcode A's URL: {url}")
+                
+            for url in ads_scan_urls_b:
+                self.assertNotIn(bibcode_a, url,
+                               f"Bibcode B's ADS_SCAN links leaked bibcode A's URL: {url}")
+            
+            # Verify B has its own bibcode in its URLs
+            b_pdf_has_own_bibcode = any(bibcode_b in url for url in ads_pdf_urls_b)
+            b_scan_has_own_bibcode = any(bibcode_b in url for url in ads_scan_urls_b)
+            
+            self.assertTrue(b_pdf_has_own_bibcode, 
+                          f"Bibcode B should have its own bibcode in ADS_PDF URLs")
+            self.assertTrue(b_scan_has_own_bibcode,
+                          f"Bibcode B should have its own bibcode in ADS_SCAN URLs")
+            
+            # Also verify DATA links don't leak
+            # A has no DATA links, B has DATA links (CDS, NED, SIMBAD, Vizier)
+            data_a = result_a['links']['DATA']
+            data_b = result_b['links']['DATA']
+            
+            self.assertEqual(len(data_a), 0, "Bibcode A should have no DATA links")
+            self.assertGreater(len(data_b), 0, "Bibcode B should have DATA links")
+            
+            # Verify DATA subtypes in B
+            self.assertIn('CDS', data_b)
+            self.assertIn('NED', data_b)
+            self.assertIn('SIMBAD', data_b)
+            self.assertIn('Vizier', data_b)
+            
+            print(f"\n✅ Link leakage test passed!")
+            print(f"   Bibcode A processed: {len(ads_pdf_urls_a)} ADS_PDF URLs, {len(ads_scan_urls_a)} ADS_SCAN URLs")
+            print(f"   Bibcode B processed: {len(ads_pdf_urls_b)} ADS_PDF URLs, {len(ads_scan_urls_b)} ADS_SCAN URLs")
+            print(f"   No links from A leaked into B")