-
Notifications
You must be signed in to change notification settings - Fork 0
Sourcery refactored master branch #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,8 +9,7 @@ def get_table_download_link(df): | |
| """ | ||
| csv = df.to_csv(index=False) | ||
| b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here | ||
| href = f'<a href="data:file/csv;base64,{b64}">Download csv file</a>' | ||
| return href | ||
| return f'<a href="data:file/csv;base64,{b64}">Download csv file</a>' | ||
|
|
||
| def main(): | ||
| st.image('logo.png', width= 200) | ||
|
|
@@ -45,7 +44,7 @@ def main(): | |
| percentual = st.slider('Escolha o limite de percentual faltante limite para as colunas vocë deseja inputar os dados', min_value=0, max_value=100) | ||
| lista_colunas = list(exploracao[exploracao['NA %'] < percentual]['nomes']) | ||
| select_method = st.radio('Escolha um metodo abaixo :', ('Média', 'Mediana')) | ||
| st.markdown('Você selecionou : ' +str(select_method)) | ||
| st.markdown(f'Você selecionou : {str(select_method)}') | ||
|
Comment on lines
-48
to
+47
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| if select_method == 'Média': | ||
| df_inputado = df[lista_colunas].fillna(df[lista_colunas].mean()) | ||
| exploracao_inputado = pd.DataFrame({'nomes': df_inputado.columns, 'tipos': df_inputado.dtypes, 'NA #': df_inputado.isna().sum(), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,36 +4,42 @@ | |
|
|
||
|
|
||
| def criar_histograma(coluna, df): | ||
| chart = alt.Chart(df, width=600).mark_bar().encode( | ||
| alt.X(coluna, bin=True), | ||
| y='count()', tooltip=[coluna, 'count()'] | ||
| ).interactive() | ||
| return chart | ||
| return ( | ||
| alt.Chart(df, width=600) | ||
| .mark_bar() | ||
| .encode( | ||
| alt.X(coluna, bin=True), y='count()', tooltip=[coluna, 'count()'] | ||
| ) | ||
| .interactive() | ||
| ) | ||
|
Comment on lines
-7
to
+14
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def criar_barras(coluna_num, coluna_cat, df): | ||
| bars = alt.Chart(df, width = 600).mark_bar().encode( | ||
| x=alt.X(coluna_num, stack='zero'), | ||
| y=alt.Y(coluna_cat), | ||
| tooltip=[coluna_cat, coluna_num] | ||
| ).interactive() | ||
| return bars | ||
| return ( | ||
| alt.Chart(df, width=600) | ||
| .mark_bar() | ||
| .encode( | ||
| x=alt.X(coluna_num, stack='zero'), | ||
| y=alt.Y(coluna_cat), | ||
| tooltip=[coluna_cat, coluna_num], | ||
| ) | ||
| .interactive() | ||
| ) | ||
|
Comment on lines
-15
to
+27
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| def criar_boxplot(coluna_num, coluna_cat, df): | ||
| boxplot = alt.Chart(df, width=600).mark_boxplot().encode( | ||
| x=coluna_num, | ||
| y=coluna_cat | ||
| return ( | ||
| alt.Chart(df, width=600) | ||
| .mark_boxplot() | ||
| .encode(x=coluna_num, y=coluna_cat) | ||
| ) | ||
| return boxplot | ||
|
Comment on lines
-23
to
-27
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| def criar_scatterplot(x, y, color, df): | ||
| scatter = alt.Chart(df, width=800, height=400).mark_circle().encode( | ||
| alt.X(x), | ||
| alt.Y(y), | ||
| color = color, | ||
| tooltip = [x, y] | ||
| ).interactive() | ||
| return scatter | ||
| return ( | ||
| alt.Chart(df, width=800, height=400) | ||
| .mark_circle() | ||
| .encode(alt.X(x), alt.Y(y), color=color, tooltip=[x, y]) | ||
| .interactive() | ||
| ) | ||
|
Comment on lines
-30
to
+42
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| def cria_correlationplot(df, colunas_numericas): | ||
| cor_data = (df[colunas_numericas]).corr().stack().reset_index().rename(columns={0: 'correlation', 'level_0': 'variable', 'level_1': 'variable2'}) | ||
|
|
@@ -65,53 +71,45 @@ def main(): | |
| col = st.selectbox('Selecione a coluna :', colunas_numericas) | ||
| if col is not None: | ||
| st.markdown('Selecione o que deseja analisar :') | ||
| mean = st.checkbox('Média') | ||
| if mean: | ||
| if mean := st.checkbox('Média'): | ||
| st.markdown(df[col].mean()) | ||
| median = st.checkbox('Mediana') | ||
| if median: | ||
| if median := st.checkbox('Mediana'): | ||
| st.markdown(df[col].median()) | ||
| desvio_pad = st.checkbox('Desvio padrão') | ||
| if desvio_pad: | ||
| if desvio_pad := st.checkbox('Desvio padrão'): | ||
| st.markdown(df[col].std()) | ||
| kurtosis = st.checkbox('Kurtosis') | ||
| if kurtosis: | ||
| if kurtosis := st.checkbox('Kurtosis'): | ||
| st.markdown(df[col].kurtosis()) | ||
| skewness = st.checkbox('Skewness') | ||
| if skewness: | ||
| if skewness := st.checkbox('Skewness'): | ||
| st.markdown(df[col].skew()) | ||
| describe = st.checkbox('Describe') | ||
| if describe: | ||
| if describe := st.checkbox('Describe'): | ||
| st.table(df[colunas_numericas].describe().transpose()) | ||
| st.subheader('Visualização dos dados') | ||
| st.image('https://media.giphy.com/media/Rkoat5KMaw2aOHDduz/giphy.gif', width=200) | ||
| st.markdown('Selecione a visualizacao') | ||
| histograma = st.checkbox('Histograma') | ||
| if histograma: | ||
| if histograma := st.checkbox('Histograma'): | ||
| col_num = st.selectbox('Selecione a Coluna Numerica: ', colunas_numericas,key = 'unique') | ||
| st.markdown('Histograma da coluna : ' + str(col_num)) | ||
| st.markdown(f'Histograma da coluna : {str(col_num)}') | ||
| st.write(criar_histograma(col_num, df)) | ||
| barras = st.checkbox('Gráfico de barras') | ||
| if barras: | ||
| if barras := st.checkbox('Gráfico de barras'): | ||
| col_num_barras = st.selectbox('Selecione a coluna numerica: ', colunas_numericas, key = 'unique') | ||
| col_cat_barras = st.selectbox('Selecione uma coluna categorica : ', colunas_object, key = 'unique') | ||
| st.markdown('Gráfico de barras da coluna ' + str(col_cat_barras) + ' pela coluna ' + col_num_barras) | ||
| st.markdown( | ||
| f'Gráfico de barras da coluna {str(col_cat_barras)} pela coluna ' | ||
| + col_num_barras | ||
| ) | ||
| st.write(criar_barras(col_num_barras, col_cat_barras, df)) | ||
| boxplot = st.checkbox('Boxplot') | ||
| if boxplot: | ||
| if boxplot := st.checkbox('Boxplot'): | ||
| col_num_box = st.selectbox('Selecione a Coluna Numerica:', colunas_numericas,key = 'unique' ) | ||
| col_cat_box = st.selectbox('Selecione uma coluna categorica : ', colunas_object, key = 'unique') | ||
| st.markdown('Boxplot ' + str(col_cat_box) + ' pela coluna ' + col_num_box) | ||
| st.markdown(f'Boxplot {str(col_cat_box)} pela coluna ' + col_num_box) | ||
| st.write(criar_boxplot(col_num_box, col_cat_box, df)) | ||
| scatter = st.checkbox('Scatterplot') | ||
| if scatter: | ||
| if scatter := st.checkbox('Scatterplot'): | ||
| col_num_x = st.selectbox('Selecione o valor de x ', colunas_numericas, key = 'unique') | ||
| col_num_y = st.selectbox('Selecione o valor de y ', colunas_numericas, key = 'unique') | ||
| col_color = st.selectbox('Selecione a coluna para cor', colunas) | ||
| st.markdown('Selecione os valores de x e y') | ||
| st.write(criar_scatterplot(col_num_x, col_num_y, col_color, df)) | ||
| correlacao = st.checkbox('Correlacao') | ||
| if correlacao: | ||
| if correlacao := st.checkbox('Correlacao'): | ||
|
Comment on lines
-68
to
+112
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| st.markdown('Gráfico de correlação das colunas númericas') | ||
| st.write(cria_correlationplot(df, colunas_numericas)) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,9 +16,7 @@ def read_data(self, etapa_treino=True): | |
| ''' | ||
|
|
||
| if etapa_treino: | ||
| df = pd.read_csv(self.path_train) | ||
| return df | ||
|
|
||
| return pd.read_csv(self.path_train) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| df = pd.read_csv(self.path_test) | ||
| y = pd.read_csv(self.path_label) | ||
| return df, y | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,8 +35,6 @@ def process(self, df, etapa_treino=True): | |
| print('Removing target') | ||
| if etapa_treino == True: | ||
| numeric_features.remove('SalePrice') | ||
| else: | ||
| pass | ||
|
Comment on lines
-38
to
-39
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| print('Feature encoder') | ||
| print('Feature Normalization and Encoding') | ||
| std_scaler = StandardScaler() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function
get_table_download_linkrefactored with the following changes:inline-immediately-returned-variable)