Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Folder of json files containing speech data
data/speeches/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
Expand Down
Binary file added data/.DS_Store
Binary file not shown.
189,227 changes: 189,227 additions & 0 deletions data/all_speeches.csv

Large diffs are not rendered by default.

405 changes: 405 additions & 0 deletions notebooks/eda.ipynb

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions scripts/create_dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
from pathlib import Path
import pandas as pd

def build_df_from_json_files(json_folder, output_file):
"""
Combines all JSON files in `json_folder` into one DataFrame.
If `output_file` already exists, loads it instead of rebuilding.
"""
# check if file already exists
if os.path.exists(output_file):
print(f"File already exists")
return
else:
# if file doesn't exist, combine files within `json_folder` into a dataframe
print("Building new combined DataFrame from JSON files")
paths = Path(json_folder).glob("*.json")
df = pd.DataFrame([pd.read_json(p, typ="series") for p in paths])
print(f"Combined DataFrame saved to {output_file}")
# save to path specified by `output_file`
df.to_csv(output_file)

return

# preparing for execution in terminal
if __name__ == "__main__":
json_folder = "../data/speeches" # source of json files
output_file = "../data/all_speeches.csv" # path for combined dataframe

df = build_df_from_json_files(json_folder, output_file)
2 changes: 1 addition & 1 deletion src/mypkg/mymodule.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
def test():
print("Hello world! I speak Python!")
print("Hello world! I speak English and Python!")