Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions AD450_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def add(num1, num2):
return num1 + num2

def multiply(num1, num2):
return num1 * num2
161 changes: 161 additions & 0 deletions Untitled-1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
'''Question 1: Assume you are organizing transportation for a group of workers and want to assign bikes to them.
Each worker can be assigned at most one bike, and each bike can be assigned to at most one worker.
Each worker i is located at position workers[i], and each bike j is located at position bikes[j].
The distance between a worker and a bike is defined as:
abs(workers[i] - bikes[j])


A worker can only be assigned a bike if the distance is less than or equal to D.
Your goal is to maximize the number of workers who get a bike. Return the maximum number of workers that can be assigned a bike. '''


'''Example 1
Input:
workers = [1, 2, 3], bikes = [2], D = 1

Output:
1

Explanation:
There is only one bike. It can be assigned to only one worker whose distance from the bike is less than or equal to 1.
Example 2

Input:
workers = [1, 2], bikes = [2, 3], D = 1
# 1 - 2 = 1
2 - 3 = 1

Output:
2

Explanation:
Both workers can be assigned bikes within distance 1.
Constraints
1 <= workers.length <= 3 * 10^4
0 <= bikes.length <= 3 * 10^4
0 <= workers[i], bikes[j] <= 10^9



0 <= D <= 10^9


'''


def bikedistance_a(workers, bikes, D):
num_of_bikes = 0
for i in range(len(workers)):
for j in range(len(bikes)):
if bikes[j] != None and abs(workers[i] - bikes[j]) <= D:
num_of_bikes += 1
break


return num_of_bikes



def bikedistance_b(workers, bikes, D):
workers.sort()
bikes.sort()

workers_pointer = 0
bikes_pointer = 0
num_of_bikes = 0

while workers_pointer <= len(workers) - 1 and bikes_pointer <= len(bikes) - 1:
if bikes[bikes_pointer] != None and abs(workers[workers_pointer] - bikes[bikes_pointer]) <= D:
bikes[bikes_pointer] = None
bikes_pointer += 1
workers_pointer += 1
num_of_bikes += 1
elif (workers[workers_pointer] <= bikes[bikes_pointer]):
workers_pointer += 1
else:
bikes_pointer += 1


return num_of_bikes





'''

Question 2
Design a stack that supports push, pop, top, and retrieving the minimum element in constant time.
Implement the MinStack class:
MinStack() initializes the stack object.

void push(int val) pushes the element val onto the stack.

void pop() removes the element on the top of the stack.

int top() gets the top element of the stack.

int getMin() retrieves the minimum element in the stack.

You must implement a solution with O(1) time complexity for each function.


Example 1:
Input -
["MinStack","push","push","push","getMin","pop","top","getMin"]


[[],[-2],[0],[-3],[],[],[],[]]

Output -
[null,null,null,null,-3,null,0,-2]


Explanation:
MinStack minStack = new MinStack();
minStack.push(-2);
minStack.push(0);
minStack.push(-3);
minStack.getMin(); // return -3
minStack.pop();
minStack.top(); // return 0
minStack.getMin(); // return -2


Constraints:
-231 <= val <= 231 - 1
Methods pop, top and getMin operations will always be called on non-empty stacks.
At most 3 * 104 calls will be made to push, pop, top, and getMin.

'''


class MinStack:


def __init__(self):
self.stack = []
self.min_stack = []

def getMin(self):
return self.min_stack[-1]


def pop(val, self):
self.stack.pop()
self.min_stack.pop()

def top(self):
return self.stack[-1]

def getMin(self):
return self.min_stack[-1]









178 changes: 178 additions & 0 deletions data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import pandas as pd
import calendar, datetime
import unicodedata

MONTH_DICT = dict((v,k) for k,v in enumerate(calendar.month_name))
FEATURE_COLUMNS = ['lead_avg_rank', 'lead_avg_points', 'lead_count',
'boulder_avg_rank', 'boulder_count',
'speed_avg_rank', 'speed_count']
GENDER_DF = pd.read_csv('ifsc_climbing_data/genders.csv')

def rename_columns(df):
rename_dict = {name : name.lower().replace(' ', '_') for name in df.columns}
return df.rename(columns=rename_dict)

def remove_youth(df):
return df[~df.competition_title.str.lower().str.contains('youth')]


def get_end_day(string):
value_list = re.split(' ', string)

if len(value_list) == 6:
end_day = int(value_list[3])
else:
end_day = None

return end_day

def date_create(df):

df['year'] = df.competition_date.str.slice(start = -4).astype('int')
df['month_string'] = df.competition_date.str.extract('([A-za-z]+)')
df['month'] = df.month_string.map(MONTH_DICT)
df['day'] = df.competition_date.str.extract('(\d+)').astype('int')
df['start_date'] = pd.to_datetime(df[['year', 'month', 'day']])

return df

def date_filter(df, date):
return df[df.start_date < date]

def data_cleaning(df, filter_dates = True, date = None):
df_renamed = rename_columns(df)
df_dedup = df_renamed.drop_duplicates()
df_adult = remove_youth(df_dedup)
df_date = date_create(df_adult)

if filter_dates:
date_filter_df = date_filter(df_date, date)
return date_filter_df
else:
return df_date

def agg_join_data(lr_df, br_df, sr_df, predicting_comp):
join_col = ['first', 'last']

ld_agg_mean = lr_df[['first', 'last', 'rank', 'points']].groupby(join_col).mean().reset_index()
ld_agg_count = lr_df[['first', 'last', 'rank']].groupby(join_col).count().reset_index()
ld_agg_df = ld_agg_mean.merge(ld_agg_count, on = join_col).rename(columns = {'rank_x': 'lead_avg_rank',
'points': 'lead_avg_points',
'rank_y': 'lead_count'
})
br_agg_mean = br_df[['first', 'last', 'rank']].groupby(join_col).mean().reset_index()
br_agg_count = br_df[['first', 'last', 'rank']].groupby(join_col).count().reset_index()
br_agg_df = br_agg_mean.merge(br_agg_count, on = join_col).rename(columns = {'rank_x': 'boulder_avg_rank',
'rank_y': 'boulder_count'
})
sr_agg_mean = sr_df[['first', 'last', 'rank']].groupby(join_col).mean().reset_index()
sr_agg_count = sr_df[['first', 'last', 'rank']].groupby(join_col).count().reset_index()
sr_agg_df = sr_agg_mean.merge(sr_agg_count, on = join_col).rename(columns = {'rank_x': 'speed_avg_rank',
'rank_y': 'speed_count'
})
predicting_comp['full_name'] = predicting_comp['last'] + ' ' + predicting_comp['first']

pred_aggs_raw = predicting_comp.merge(ld_agg_df, how = 'left', on = join_col
).merge(br_agg_df, how = 'left', on = join_col
).merge(sr_agg_df, how = 'left', on = join_col
).merge(GENDER_DF, on = ['full_name']
)[['first', 'last', 'nation', 'rank', 'gender'] + FEATURE_COLUMNS]

return pred_aggs_raw

def create_fill_value(column, value):
if 'count' in column:
return 0
else:
return value

def fill_features(df):
max_values = df[FEATURE_COLUMNS].max()

fill_dict = {column : create_fill_value(column, value) for
column, value in zip(max_values.keys(), max_values)}

return df.fillna(value = fill_dict)

def process_data(br_raw, lr_raw, sr_raw, cr_raw, date, comp_name):
br_df = data_cleaning(br_raw, date = date)
lr_df = data_cleaning(lr_raw, date = date)
sr_df = data_cleaning(sr_raw, date = date)


cr_df = data_cleaning(cr_raw, False)
predicting_comp = cr_df[cr_df.competition_title == comp_name]

pred_aggs_raw = agg_join_data(lr_df, br_df, sr_df, predicting_comp)

pred_aggs = fill_features(pred_aggs_raw)

pred_aggs['avg_rank_multi'] = pred_aggs.lead_avg_rank * pred_aggs.boulder_avg_rank * pred_aggs.speed_avg_rank

return pred_aggs

def rename_columns(df_raw):
df_columns_renamed = df_raw.copy()

df_columns_renamed.columns = df_columns_renamed.columns.str.lower()
df_columns_renamed.columns = df_columns_renamed.columns.str.strip()
df_columns_renamed.columns = df_columns_renamed.columns.str.replace(' ', '_')

df_columns_renamed.columns = [unicodedata.normalize('NFKD', c).encode('ASCII', 'ignore').decode('utf-8')
for c in df_columns_renamed.columns]


return df_columns_renamed

def remove_fully_null_columns_rows(df_columns_renamed):
df_fully_null = df_columns_renamed.copy()

df_fully_null = df_fully_null.dropna(axis=1, how='all')
df_fully_null = df_fully_null.dropna(axis=0, how='all')

return df_fully_null

def clean_and_fill_content_rating(df_fully_null_removed):
df_clean_content_rating = df_fully_null_removed.copy()

df_clean_content_rating["content_rating"] = df_clean_content_rating["content_rating"].fillna("Unrated")
df_clean_content_rating["content_rating"] = df_clean_content_rating["content_rating"].replace("Not Rated", "Unrated")

return df_clean_content_rating

def clean_release_year(df_clean):
df_clean_release_year_temp = df_clean.copy()

df_clean_release_year_temp["release_year_coerce"] = pd.to_datetime(df_clean_release_year_temp["release_year"], errors = "coerce")
df_clean_release_year_temp["release_year_mixed"] = pd.to_datetime(df_clean_release_year_temp["release_year"], errors = "coerce", format = "mixed")

return df_clean_release_year_temp

def clean_income(clean_release_year):
df_clean_income = clean_release_year.copy()

df_clean_income["income"] = df_clean_income["income"].astype(str).str.replace(r'[^0-9,.]+', '', regex = True)
df_clean_income["income"] = df_clean_income["income"].str.replace(',', '', regex = False)
df_clean_income["income"] = df_clean_income["income"].replace('', None)
df_clean_income["income"] = df_clean_income["income"].astype('Int64')

return df_clean_income



















Loading