diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..08d5692 --- /dev/null +++ b/.gitignore @@ -0,0 +1,199 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be added to the global gitignore or merged into this project gitignore. For a PyCharm +# project, it is recommended to use the project-specific .gitignore file. +.idea/ + +# VS Code +.vscode/ +*.code-workspace + +# macOS +.DS_Store +.AppleDouble +.LSOverride + +# Windows +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db +*.stackdump +[Dd]esktop.ini +$RECYCLE.BIN/ +*.cab +*.msi +*.msix +*.msm +*.msp +*.lnk + +# Project-specific ignores +# Generated CSV output directories +*-output-csvs/ +*-output-csvs-output-midis/ + +# Temporary files +*.tmp +*.temp + +# Audio/MIDI files (optional - uncomment if you don't want to track these) +# *.mid +# *.midi +# *.wav +# *.mp3 +# *.flac diff --git a/example/test/Take_on_Me_-_a-ha.mid b/example/test/Take_on_Me_-_a-ha.mid new file mode 100644 index 0000000..1b41999 Binary files /dev/null and b/example/test/Take_on_Me_-_a-ha.mid differ diff --git a/midi_to_csv.py b/midi_to_csv.py index 013cd87..2a9c24b 100644 --- a/midi_to_csv.py +++ b/midi_to_csv.py @@ -50,16 +50,20 @@ def main(): for file in os.listdir(input_dir): filename = os.fsdecode(file) + # Skip directories and non-MIDI files + file_path = os.path.join(args.input_dir_name, filename) + if os.path.isdir(file_path) or not filename.endswith(".mid"): + continue print("Processing " + filename + " in to " + filename[:-4] + ".csv") - assert filename.endswith(".mid"), "files must be midi files" mf = music21.midi.MidiFile() mf.open(args.input_dir_name + "/" + filename) mf.read() mf.close() - s = music21.midi.translate.midiFileToStream(mf, quantizePost=False).flat #quantize is what rounds all note durations to real music note types, not needed for our application + s = music21.midi.translate.midiFileToStream(mf, quantizePost=False).flatten() #quantize is what rounds all note durations to real music note types, not needed for our application # Convert chords in to notes. # TODO: consider chords as separate objects from notes? Everything's in music21 anyways df = pd.DataFrame(columns=["note_name", "start_time", "duration", "velocity", "tempo"]) + note_data = [] # Collect data first, then create DataFrame at once for g in s.recurse().notes: #print(g) if g.isChord: @@ -78,14 +82,15 @@ def main(): if note.isNote: # print(note.offset) # print(note.duration) - new_df = pd.DataFrame([[note.pitch, round(float(note.offset), 3), round(note.duration.quarterLength, 3), note.volume.velocity, note_tempo]], columns=["note_name", "start_time", "duration", "velocity", "tempo"]) - - df = df.append(new_df, ignore_index=True) + note_data.append([note.pitch, round(float(note.offset), 3), round(note.duration.quarterLength, 3), note.volume.velocity, note_tempo]) #print(df) #print(new_df) #print(note) + # Create DataFrame from collected data + df = pd.DataFrame(note_data, columns=["note_name", "start_time", "duration", "velocity", "tempo"]) + #print(df) df.to_csv(output_dir_name + "/" + filename[:-4] + ".csv")