Skip to content

add read https:// hosted files to dplpy.readers() capability #66

@tyson-swetnam

Description

@tyson-swetnam

dplpy.readers() should have the ability to read both files on local disk, e.g., /home/jovyan/data.rwl and from URLs on paleo database websites, e.g., https://www.ncei.noaa.gov/pub/data/paleo/treering/measurements/africa/morc016.rwl

Minimal modifications using:

import requests
from io import StringIO

def readers(filename: str, skip_lines=0, header=False):
    # ... existing code ...

    # Determine the file format
    is_url = filename.startswith("https://")
    FORMAT = ".rwl" if filename.lower().endswith(".rwl") else ".csv"

    print("\nAttempting to read input file: " + os.path.basename(filename) + " as " + FORMAT + " format\n")

    # Handling URL input for .CSV and .RWL files
    if is_url:
        response = requests.get(filename)
        if response.status_code != 200:
            raise ValueError("Unable to download data from URL")

        # For CSV files
        if FORMAT == ".csv":
            data = StringIO(response.text)
            series_data = pd.read_csv(data, skiprows=skip_lines)

        # For RWL files
        elif FORMAT == ".rwl":
            data = StringIO(response.text)
            rwl_lines = data.readlines()
            rwl_data, first_date, last_date = read_rwl(rwl_lines)
            if rwl_data is None:
                return None
            series_data = process_rwl_data(rwl_data, first_date, last_date)

    elif filename.upper().endswith(".CSV"):
        series_data = pd.read_csv(filename, skiprows=skip_lines)
    elif filename.upper().endswith(".RWL"):
        series_data = process_rwl_pandas(filename, skip_lines, header)
    else:
        # ... existing error handling code ...

    # ... existing code for processing series_data ...

    return series_data

# New function to process RWL data from a dictionary into a DataFrame
def process_rwl_data(rwl_data, first_date, last_date):
    # ... existing logic from process_rwl_pandas function to convert rwl_data to DataFrame ...
    # ... return the DataFrame ...

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions