From a76c61ca8898db92f58c81073daf3509fdbd36b3 Mon Sep 17 00:00:00 2001 From: tlofano Date: Wed, 20 Nov 2019 19:10:37 -0300 Subject: [PATCH 1/8] Renombres apopiados --- features-NLTK.ipynb => featuresNLTK.ipynb | 207 +- features_complementarias.ipynb | 51 + .../{features-NLTK.html => featuresNLTK.html} | 191 +- html/features_complementarias.html | 13128 ++++++++++++++++ 4 files changed, 13215 insertions(+), 362 deletions(-) rename features-NLTK.ipynb => featuresNLTK.ipynb (55%) create mode 100644 features_complementarias.ipynb rename html/{features-NLTK.html => featuresNLTK.html} (98%) create mode 100644 html/features_complementarias.html diff --git a/features-NLTK.ipynb b/featuresNLTK.ipynb similarity index 55% rename from features-NLTK.ipynb rename to featuresNLTK.ipynb index 09b75af..7d15fdf 100644 --- a/features-NLTK.ipynb +++ b/featuresNLTK.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -130,99 +130,18 @@ "La cantidad de publicaciones con titulo no nulo es: 234613\n", "La cantidad total de publicaciones es: 240000\n" ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtitulocant_palabras_mas_frecuentes_titulo
0254099depto. tipo a-4021.0
153461condominio horizontal en venta3.0
2247984casa en venta urbi 3 recamaras tonala3.0
3209067casa sola en toluca zinacantepec con credito i...4.0
4185997paseos del sol2.0
\n", - "
" - ], - "text/plain": [ - " id titulo \\\n", - "0 254099 depto. tipo a-402 \n", - "1 53461 condominio horizontal en venta \n", - "2 247984 casa en venta urbi 3 recamaras tonala \n", - "3 209067 casa sola en toluca zinacantepec con credito i... \n", - "4 185997 paseos del sol \n", - "\n", - " cant_palabras_mas_frecuentes_titulo \n", - "0 1.0 \n", - "1 3.0 \n", - "2 3.0 \n", - "3 4.0 \n", - "4 2.0 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "# Ejemplo de uso de las palabras mas frecuentes\n", - "\n", - "df_palabras = feature_cantidad_mas_frecuentes(df_train, 'titulo', 200)\n", - "df_palabras[['id', 'titulo', 'cant_palabras_mas_frecuentes_titulo']].head()" + "def ejemplo():\n", + " # Ejemplo de uso de las palabras mas frecuentes\n", + " df_palabras = feature_cantidad_mas_frecuentes(df_train, 'titulo', 200)\n", + " df_palabras[['id', 'titulo', 'cant_palabras_mas_frecuentes_titulo']].head()" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -248,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -281,104 +200,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "La cantidad de publicaciones con titulo no nulo es: 234613\n", - "La cantidad total de publicaciones es: 240000\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtitulocant_palabras_menos_frecuentes_titulo
0254099depto. tipo a-4020.0
153461condominio horizontal en venta0.0
2247984casa en venta urbi 3 recamaras tonala0.0
3209067casa sola en toluca zinacantepec con credito i...0.0
4185997paseos del sol0.0
\n", - "
" - ], - "text/plain": [ - " id titulo \\\n", - "0 254099 depto. tipo a-402 \n", - "1 53461 condominio horizontal en venta \n", - "2 247984 casa en venta urbi 3 recamaras tonala \n", - "3 209067 casa sola en toluca zinacantepec con credito i... \n", - "4 185997 paseos del sol \n", - "\n", - " cant_palabras_menos_frecuentes_titulo \n", - "0 0.0 \n", - "1 0.0 \n", - "2 0.0 \n", - "3 0.0 \n", - "4 0.0 " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Ejemplo de uso de las palabras menos frecuentes\n", - "\n", - "df_palabras = feature_cantidad_menos_frecuentes(df_train, 'titulo', 200)\n", - "df_palabras[['id', 'titulo', 'cant_palabras_menos_frecuentes_titulo']].head()" + "def ejemplo():\n", + " # Ejemplo de uso de las palabras menos frecuentes\n", + " df_palabras = feature_cantidad_menos_frecuentes(df_train, 'titulo', 200)\n", + " df_palabras[['id', 'titulo', 'cant_palabras_menos_frecuentes_titulo']].head()" ] } ], diff --git a/features_complementarias.ipynb b/features_complementarias.ipynb new file mode 100644 index 0000000..6d2085b --- /dev/null +++ b/features_complementarias.ipynb @@ -0,0 +1,51 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to /home/tomas/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "La cantidad de publicaciones con titulo no nulo es: 234613\n", + "La cantidad total de publicaciones es: 240000\n" + ] + } + ], + "source": [ + "import ipynb.fs.full.featuresNLTK as features_nltk" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/html/features-NLTK.html b/html/featuresNLTK.html similarity index 98% rename from html/features-NLTK.html rename to html/featuresNLTK.html index d9ee715..52d3f54 100644 --- a/html/features-NLTK.html +++ b/html/featuresNLTK.html @@ -2,7 +2,7 @@ -features-NLTK +featuresNLTK @@ -13076,7 +13076,7 @@
-
In [1]:
+
In [2]:
import pandas as pd
@@ -13116,7 +13116,7 @@
 
-
In [2]:
+
In [3]:
def ver_info_a_filtrar(df, col):
@@ -13132,7 +13132,7 @@
 
-
In [3]:
+
In [4]:
def generar_palabras_no_queridas(arr=None):
@@ -13157,7 +13157,7 @@
 
-
In [4]:
+
In [5]:
def generar_palabras_mas_frecuentes(df, col, n):
@@ -13187,7 +13187,7 @@
 
-
In [5]:
+
In [6]:
def feature_cantidad_mas_frecuentes(df, col, n):
@@ -13224,13 +13224,13 @@
 
-
In [6]:
+
In [ ]:
-
# Ejemplo de uso de las palabras mas frecuentes
-
-df_palabras = feature_cantidad_mas_frecuentes(df_train, 'titulo', 200)
-df_palabras[['id', 'titulo', 'cant_palabras_mas_frecuentes_titulo']].head()
+
def ejemplo():
+    # Ejemplo de uso de las palabras mas frecuentes
+    df_palabras = feature_cantidad_mas_frecuentes(df_train, 'titulo', 200)
+    df_palabras[['id', 'titulo', 'cant_palabras_mas_frecuentes_titulo']].head()
 
@@ -13253,81 +13253,13 @@
-
- -
Out[6]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtitulocant_palabras_mas_frecuentes_titulo
0254099depto. tipo a-4021.0
153461condominio horizontal en venta3.0
2247984casa en venta urbi 3 recamaras tonala3.0
3209067casa sola en toluca zinacantepec con credito i...4.0
4185997paseos del sol2.0
-
-
- -
-
-
In [7]:
+
In [ ]:
def generar_palabras_menos_frecuentes(df, col, n):
@@ -13357,7 +13289,7 @@
 
-
In [8]:
+
In [ ]:
def feature_cantidad_menos_frecuentes(df, col, n):
@@ -13394,106 +13326,19 @@
 
-
In [9]:
+
In [ ]:
-
# Ejemplo de uso de las palabras menos frecuentes
-
-df_palabras = feature_cantidad_menos_frecuentes(df_train, 'titulo', 200)
-df_palabras[['id', 'titulo', 'cant_palabras_menos_frecuentes_titulo']].head()
+
def ejemplo():
+    # Ejemplo de uso de las palabras menos frecuentes
+    df_palabras = feature_cantidad_menos_frecuentes(df_train, 'titulo', 200)
+    df_palabras[['id', 'titulo', 'cant_palabras_menos_frecuentes_titulo']].head()
 
-
-
- - -
- -
- - -
-
La cantidad de publicaciones con titulo no nulo es: 234613
-La cantidad total de publicaciones es: 240000
-
-
-
- -
- -
Out[9]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtitulocant_palabras_menos_frecuentes_titulo
0254099depto. tipo a-4020.0
153461condominio horizontal en venta0.0
2247984casa en venta urbi 3 recamaras tonala0.0
3209067casa sola en toluca zinacantepec con credito i...0.0
4185997paseos del sol0.0
-
-
- -
- -
-
-
diff --git a/html/features_complementarias.html b/html/features_complementarias.html new file mode 100644 index 0000000..eacf36d --- /dev/null +++ b/html/features_complementarias.html @@ -0,0 +1,13128 @@ + + + + +features_complementarias + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [5]:
+
+
+
import ipynb.fs.full.featuresNLTK as features_nltk
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[nltk_data] Downloading package stopwords to /home/tomas/nltk_data...
+[nltk_data]   Package stopwords is already up-to-date!
+
+
+
+ +
+ +
+ + +
+
La cantidad de publicaciones con titulo no nulo es: 234613
+La cantidad total de publicaciones es: 240000
+
+
+
+ +
+
+ +
+
+
+ + + + + + From fa8f4416370088a640f34ef68bb1b334148c3824 Mon Sep 17 00:00:00 2001 From: tlofano Date: Wed, 20 Nov 2019 19:36:17 -0300 Subject: [PATCH 2/8] One Hot por concatenacion de 2 columnas --- features_complementarias.ipynb | 48 +++++++++++++++----------- html/features_complementarias.html | 54 ++++++++++++++---------------- 2 files changed, 54 insertions(+), 48 deletions(-) diff --git a/features_complementarias.ipynb b/features_complementarias.ipynb index 6d2085b..76edf74 100644 --- a/features_complementarias.ipynb +++ b/features_complementarias.ipynb @@ -2,28 +2,36 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 29, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package stopwords to /home/tomas/nltk_data...\n", - "[nltk_data] Package stopwords is already up-to-date!\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "La cantidad de publicaciones con titulo no nulo es: 234613\n", - "La cantidad total de publicaciones es: 240000\n" - ] - } - ], + "outputs": [], "source": [ - "import ipynb.fs.full.featuresNLTK as features_nltk" + "import pandas as pd\n", + "import ipynb.fs.full.features as features_nltk\n", + "\n", + "df_train = pd.read_csv('./data/train.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "def concatenar_categoricos(df, columna1, columna2, N=0, df_aux=None, devolver_cols=False):\n", + " '''Concatena dos columnas categoricas y hace one hot'''\n", + " '''Ver referencia columna_a_ohe para los ultimos 3 parametros'''\n", + " \n", + " def limpiar_espacios_blanco(texto):\n", + " return texto.replace(' ', '_')\n", + " \n", + " df = df.copy()\n", + " df[columna1] = df[columna1].apply(limpiar_espacios_blanco)\n", + " df[columna2] = df[columna2].apply(limpiar_espacios_blanco)\n", + " df[columna1 + '_' + columna2] = df[columna1] + '_' +df[columna2]\n", + " \n", + " features_nltk\n", + " return features_nltk.columna_a_ohe(df, columna1 + '_' + columna2, N, df_aux, devolver_cols)" ] } ], diff --git a/html/features_complementarias.html b/html/features_complementarias.html index eacf36d..aa4949c 100644 --- a/html/features_complementarias.html +++ b/html/features_complementarias.html @@ -13076,44 +13076,42 @@
-
In [5]:
+
In [29]:
-
import ipynb.fs.full.featuresNLTK as features_nltk
+
import pandas as pd
+import ipynb.fs.full.features as features_nltk
+
+df_train = pd.read_csv('./data/train.csv')
 
-
-
- - -
- -
- - -
-
[nltk_data] Downloading package stopwords to /home/tomas/nltk_data...
-[nltk_data]   Package stopwords is already up-to-date!
-
-
-
- -
- -
- - -
-
La cantidad de publicaciones con titulo no nulo es: 234613
-La cantidad total de publicaciones es: 240000
-
-
+
+
+
In [31]:
+
+
+
def concatenar_categoricos(df, columna1, columna2, N=0, df_aux=None, devolver_cols=False):
+    '''Concatena dos columnas categoricas y hace one hot'''
+    '''Ver referencia columna_a_ohe para los ultimos 3 parametros'''
+    
+    def limpiar_espacios_blanco(texto):
+        return texto.replace(' ', '_')
+        
+    df = df.copy()
+    df[columna1] = df[columna1].apply(limpiar_espacios_blanco)
+    df[columna2] = df[columna2].apply(limpiar_espacios_blanco)
+    df[columna1 + '_' + columna2] = df[columna1] + '_' +df[columna2]
+    
+    features_nltk
+    return features_nltk.columna_a_ohe(df, columna1 + '_' + columna2, N, df_aux, devolver_cols)
+
+
From 074750a6c459f826e6aac92d67811805f2d32b4e Mon Sep 17 00:00:00 2001 From: tlofano Date: Fri, 22 Nov 2019 21:29:04 -0300 Subject: [PATCH 3/8] Cantidad de propiedades con mismo especificacion, agrupado --- features_complementarias.ipynb | 1059 +++++++++++++++++++++++++++- html/features_complementarias.html | 850 +++++++++++++++++++++- 2 files changed, 1905 insertions(+), 4 deletions(-) diff --git a/features_complementarias.ipynb b/features_complementarias.ipynb index 76edf74..b917d62 100644 --- a/features_complementarias.ipynb +++ b/features_complementarias.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 29, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -33,6 +33,1061 @@ " features_nltk\n", " return features_nltk.columna_a_ohe(df, columna1 + '_' + columna2, N, df_aux, devolver_cols)" ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "def cantidad_propiedad_misma_cantidad(df, agrupar_col, misma_cantidad_col):\n", + " '''Cantidad de casas con la misma cantidad de banos que la actual'''\n", + " '''Por ejemplo, retorna para cada propiedad, cuantas hay mas de su tipo con x banos ej.'''\n", + " '''cantidad_propiedad_misma_cantidad(df, 'banos', 'tipodepropiedad'), para cada propiedad, se fija\n", + " cuantas mas de su tipo con tantos banos hay'''\n", + " '''Retorna el df con la columna agregada'''\n", + " df_aux = df.copy()\n", + " df_aux = df_aux.groupby([agrupar_col, misma_cantidad_col]).agg({'id': 'count'})\n", + " df_aux = df_aux.rename(columns={'id':'cantidad'})\n", + " df_aux = df_aux.reset_index()\n", + " \n", + " def get_cantidad(col1, col2):\n", + " cantidad = df_aux[(df_aux[agrupar_col] == col1) & (df_aux[misma_cantidad_col] == col2)]['cantidad']\n", + " return cantidad.values[0] if len(cantidad.values > 0) else 0\n", + " \n", + " df['cantidad_' + agrupar_col + '_' + misma_cantidad_col] = df.apply(lambda x: get_cantidad(x[agrupar_col], x[misma_cantidad_col]), axis=1)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:15: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " from ipykernel import kernelapp as app\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitulodescripciontipodepropiedaddireccionciudadprovinciaantiguedadhabitacionesgarages...latlngfechagimnasiousosmultiplespiscinaescuelascercanascentroscomercialescercanospreciocantidad_tipodepropiedad_provincia
0254099depto. tipo a-402depto. interior de 80.15m2, consta de sala com...ApartamentoAvenida Division del Norte 2005Benito JuárezDistrito FederalNaN2.01.0...NaNNaN2015-08-23 00:00:000.00.00.00.00.02273000.03
153461condominio horizontal en venta<p>entre sonora y guerrero, atr&aacute;s del h...Casa en condominioAV. MEXICOLa Magdalena ContrerasDistrito Federal10.03.02.0...19.310205-99.2276552013-06-28 00:00:000.00.00.01.01.03600000.02
2247984casa en venta urbi 3 recamaras tonaladescripcion \\nla mejor ubicacion residencial e...CasaUrbi TonalaTonaláJalisco5.03.02.0...NaNNaN2015-10-17 00:00:000.00.00.00.00.01200000.02
3209067casa sola en toluca zinacantepec con credito i...casa en privada con caseta de vigilancia casas...CasaIGNACIO MANUEL ALTAMIRANO 128ZinacantepecEdo. de México1.02.01.0...19.301890-99.6880152012-03-09 00:00:000.00.00.01.01.0650000.02
4185997paseos del solbonito departamento en excelentes condiciones ...ApartamentoPASEOS DEL SOLZapopanJalisco10.02.01.0...NaNNaN2016-06-07 00:00:000.00.00.00.00.01150000.01
5126147departamento en venta taxqueñaamplio departamento, estancia de sala y comedo...ApartamentoCondominio Tlalpan 2BCoyoacánDistrito Federal5.02.01.0...19.300612-99.1484752014-03-18 00:00:000.00.00.00.01.01100000.03
6139233de oportunidad casa en san lorenzoubicada en esquina, pertenece san lorenzo agen...CasaNaNOaxaca de JuárezOaxacaNaN3.01.0...17.143504-96.8035042016-02-23 00:00:000.00.00.00.00.01150000.01
75013casa emilia en venta en selvamar playa del carmencasa emilia en venta playa del carmenfracciona...Casacondominio el trebolPlaya del CarmenQuintana Roo2.04.02.0...20.672612-87.0379682016-10-20 00:00:000.00.00.00.00.04200000.01
844962pre- venta preciosos depas 2 recamaras con sub...<p>pre-venta de preciosos departamento ecologi...ApartamentoBUENAVISTA DEPTOS CON SUBSIDIOVilla de AlvarezColima1.02.01.0...NaNNaN2014-01-06 00:00:000.00.00.01.01.0310000.01
9134537terrenoterreno de 5.500m2 bardeado, uso de suelo h-20...TerrenoAv. MorelosIxtapalucaEdo. de MéxicoNaNNaNNaN...19.316000-98.8870002016-12-22 00:00:000.00.00.00.00.06200000.01
1051180mi 2° credito<p>mi 2&deg; credito<br />para todos aquellos ...CasaMI 2° CREDITOSan Luis PotosíSan luis Potosí0.03.01.0...22.195878-101.0189562013-01-03 00:00:000.00.00.01.01.0488000.01
11103293departamento en venta, san pedro garza garcia,...departamento nuevo ,256 m2 de construccion,un ...ApartamentoNaNSan Pedro Garza GarcíaNuevo LeónNaN3.02.0...NaNNaN2014-12-01 00:00:000.00.00.00.00.07200000.01
1247890hermosa casa en zibata queretaro 170m²<p><strong>casa </strong>en venta en fracciona...CasaPitahayas 2 - 58, ZibataQuerétaroQuerétaro0.03.02.0...20.682878-100.3175032013-08-12 00:00:000.01.00.01.01.01500000.04
13130216casa matias romero - sión bancariaflamante casa habitación en condominio horizon...CasaMATIAS ROMERO, COLONIA DEL VALLEBenito JuárezDistrito Federal20.0NaN0.0...19.374895-99.1495192015-03-31 00:00:000.00.00.00.00.03000000.01
14181436casa en venta en valle realpre venta de residencia en la toscana con exce...CasaNaNZapopanJaliscoNaNNaN2.0...20.729601-103.4319932014-11-25 00:00:000.00.00.00.00.05300000.02
15201923casa en venta lomas verdes naucalpan<p>casa en dos niveles, 3 recámaras la princip...CasaNaNNaucalpan de JuárezEdo. de MéxicoNaN3.02.0...19.516583-99.2560282015-01-13 00:00:000.00.00.00.00.03650000.02
16283945preciosa casa en cumbres del lago \\t<p>clave: vcln2450 fecha de actualizaci&oacute...CasaCUMBRES DEL LAGO JURIQUILLA, QUERETARO, MEXICOQuerétaroQuerétaro0.03.02.0...20.708760-100.4593032013-03-06 00:00:000.00.00.01.01.02450000.04
1773348oportunidad, departamento col del valle, 3 rec...magnifico departamento con excelente distribuc...ApartamentoPazaje Santa CruzBenito JuárezDistrito Federal5.03.02.0...NaNNaN2016-10-30 00:00:000.00.00.01.01.04750000.03
1840421últimos terrenos en preventa!! en bukara coto ...terreno ubicado en bukara coto club, el cual s...TerrenoMilenioQuerétaroQuerétaroNaNNaN0.0...NaNNaN2016-12-03 00:00:000.00.01.01.01.0794078.01
1987655NaN-- rcv151104-lv-27 -- linda casa con area d...CasaSENDA CELESTIAL 10QuerétaroQuerétaro0.03.02.0...NaNNaN2016-01-15 00:00:000.00.00.00.00.01850000.04
2090071terreno residencial en venta en cumbres quinta...<p>excelente terreno plano, frente a parque en...TerrenoTERRENO RESIDENCIAL EN VENTA EN CUMBRESMonterreyNuevo León0.0NaN0.0...NaNNaN2013-12-02 00:00:000.00.00.00.00.0490000.01
21224513casa en venta en querétaronocnok id: mx15-bk0340. hermosa casa en conjun...Casa en condominioNaNQuerétaroQuerétaroNaN3.02.0...NaNNaN2016-02-01 00:00:000.00.00.00.00.02300000.01
22129818comoda casa con recamara en planta baja y ampl...<p>comoda casa con recamara en planta baja y a...CasaNaNCiudad MaderoTamaulipas0.04.0NaN...22.264768-97.8410122013-12-12 00:00:000.00.00.00.00.01340000.01
23146699casa en condominio en venta, benito juarez, ca...casa en venta en cancun en residencial cumbres...Casa en condominioNaNCancúnQuintana Roo4.03.02.0...21.113768-86.8463232015-05-19 00:00:000.00.00.00.00.03500000.01
24235958casa en venta - colinas del cimatario, queréta...<p>propiedad en un nivel, con espacios amplios...CasaColinas del Cimatario C721QuerétaroQuerétaro4.03.0NaN...20.559744-100.3673822013-09-26 00:00:000.00.00.01.01.03900000.04
2597918casa venta santa ceciliahermosa casa en santa cecilia en apodaca, cerc...CasaLopez VelardeApodacaNuevo León8.03.02.0...NaNNaN2016-12-31 00:00:000.00.00.01.01.0850000.01
2670294casa en venta de un nivel , frac. misiones de ...¡excelente oportunidad! \\n\\ncasa de un nivel ...Casa en condominioMISIONES DE SAN FRANCISCOPueblaPuebla3.02.01.0...NaNNaN2014-07-22 00:00:000.00.00.01.01.0310000.01
27177031NaNhermosa casa remodelada, con acabados de lujo,...Casa en condominioPASEO DE LOS LAURELESMiguel HidalgoDistrito Federal18.03.03.0...NaNNaN2012-03-07 00:00:000.00.00.00.00.010800000.02
28144635casa en venta en santillana icasa en coto 3 recamaras, estudio, sala-comedo...Casa en condominioCoto Santillana I Casa G11ZapopanJalisco1.03.02.0...20.714140-103.4538082015-01-20 00:00:000.00.00.00.00.02600000.01
29146158terreno urbano bardeado zona tesistánen zona de urbanización progresiva, terreno pl...Terreno comercialcamino viejo a TesistanZapopanJalisco20.0NaN0.0...20.784515-103.4770622016-12-03 00:00:000.00.00.01.00.010707900.01
\n", + "

30 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " id titulo \\\n", + "0 254099 depto. tipo a-402 \n", + "1 53461 condominio horizontal en venta \n", + "2 247984 casa en venta urbi 3 recamaras tonala \n", + "3 209067 casa sola en toluca zinacantepec con credito i... \n", + "4 185997 paseos del sol \n", + "5 126147 departamento en venta taxqueña \n", + "6 139233 de oportunidad casa en san lorenzo \n", + "7 5013 casa emilia en venta en selvamar playa del carmen \n", + "8 44962 pre- venta preciosos depas 2 recamaras con sub... \n", + "9 134537 terreno \n", + "10 51180 mi 2° credito \n", + "11 103293 departamento en venta, san pedro garza garcia,... \n", + "12 47890 hermosa casa en zibata queretaro 170m² \n", + "13 130216 casa matias romero - sión bancaria \n", + "14 181436 casa en venta en valle real \n", + "15 201923 casa en venta lomas verdes naucalpan \n", + "16 283945 preciosa casa en cumbres del lago \\t \n", + "17 73348 oportunidad, departamento col del valle, 3 rec... \n", + "18 40421 últimos terrenos en preventa!! en bukara coto ... \n", + "19 87655 NaN \n", + "20 90071 terreno residencial en venta en cumbres quinta... \n", + "21 224513 casa en venta en querétaro \n", + "22 129818 comoda casa con recamara en planta baja y ampl... \n", + "23 146699 casa en condominio en venta, benito juarez, ca... \n", + "24 235958 casa en venta - colinas del cimatario, queréta... \n", + "25 97918 casa venta santa cecilia \n", + "26 70294 casa en venta de un nivel , frac. misiones de ... \n", + "27 177031 NaN \n", + "28 144635 casa en venta en santillana i \n", + "29 146158 terreno urbano bardeado zona tesistán \n", + "\n", + " descripcion tipodepropiedad \\\n", + "0 depto. interior de 80.15m2, consta de sala com... Apartamento \n", + "1

entre sonora y guerrero, atrás del h... Casa en condominio \n", + "2 descripcion \\nla mejor ubicacion residencial e... Casa \n", + "3 casa en privada con caseta de vigilancia casas... Casa \n", + "4 bonito departamento en excelentes condiciones ... Apartamento \n", + "5 amplio departamento, estancia de sala y comedo... Apartamento \n", + "6 ubicada en esquina, pertenece san lorenzo agen... Casa \n", + "7 casa emilia en venta playa del carmenfracciona... Casa \n", + "8

pre-venta de preciosos departamento ecologi... Apartamento \n", + "9 terreno de 5.500m2 bardeado, uso de suelo h-20... Terreno \n", + "10

mi 2° credito
para todos aquellos ... Casa \n", + "11 departamento nuevo ,256 m2 de construccion,un ... Apartamento \n", + "12

casa en venta en fracciona... Casa \n", + "13 flamante casa habitación en condominio horizon... Casa \n", + "14 pre venta de residencia en la toscana con exce... Casa \n", + "15

casa en dos niveles, 3 recámaras la princip... Casa \n", + "16

clave: vcln2450 fecha de actualizació... Casa \n", + "17 magnifico departamento con excelente distribuc... Apartamento \n", + "18 terreno ubicado en bukara coto club, el cual s... Terreno \n", + "19 -- rcv151104-lv-27 -- linda casa con area d... Casa \n", + "20

excelente terreno plano, frente a parque en... Terreno \n", + "21 nocnok id: mx15-bk0340. hermosa casa en conjun... Casa en condominio \n", + "22

comoda casa con recamara en planta baja y a... Casa \n", + "23 casa en venta en cancun en residencial cumbres... Casa en condominio \n", + "24

propiedad en un nivel, con espacios amplios... Casa \n", + "25 hermosa casa en santa cecilia en apodaca, cerc... Casa \n", + "26 ¡excelente oportunidad! \\n\\ncasa de un nivel ... Casa en condominio \n", + "27 hermosa casa remodelada, con acabados de lujo,... Casa en condominio \n", + "28 casa en coto 3 recamaras, estudio, sala-comedo... Casa en condominio \n", + "29 en zona de urbanización progresiva, terreno pl... Terreno comercial \n", + "\n", + " direccion ciudad \\\n", + "0 Avenida Division del Norte 2005 Benito Juárez \n", + "1 AV. MEXICO La Magdalena Contreras \n", + "2 Urbi Tonala Tonalá \n", + "3 IGNACIO MANUEL ALTAMIRANO 128 Zinacantepec \n", + "4 PASEOS DEL SOL Zapopan \n", + "5 Condominio Tlalpan 2B Coyoacán \n", + "6 NaN Oaxaca de Juárez \n", + "7 condominio el trebol Playa del Carmen \n", + "8 BUENAVISTA DEPTOS CON SUBSIDIO Villa de Alvarez \n", + "9 Av. Morelos Ixtapaluca \n", + "10 MI 2° CREDITO San Luis Potosí \n", + "11 NaN San Pedro Garza García \n", + "12 Pitahayas 2 - 58, Zibata Querétaro \n", + "13 MATIAS ROMERO, COLONIA DEL VALLE Benito Juárez \n", + "14 NaN Zapopan \n", + "15 NaN Naucalpan de Juárez \n", + "16 CUMBRES DEL LAGO JURIQUILLA, QUERETARO, MEXICO Querétaro \n", + "17 Pazaje Santa Cruz Benito Juárez \n", + "18 Milenio Querétaro \n", + "19 SENDA CELESTIAL 10 Querétaro \n", + "20 TERRENO RESIDENCIAL EN VENTA EN CUMBRES Monterrey \n", + "21 NaN Querétaro \n", + "22 NaN Ciudad Madero \n", + "23 NaN Cancún \n", + "24 Colinas del Cimatario C721 Querétaro \n", + "25 Lopez Velarde Apodaca \n", + "26 MISIONES DE SAN FRANCISCO Puebla \n", + "27 PASEO DE LOS LAURELES Miguel Hidalgo \n", + "28 Coto Santillana I Casa G11 Zapopan \n", + "29 camino viejo a Tesistan Zapopan \n", + "\n", + " provincia antiguedad habitaciones garages ... lat \\\n", + "0 Distrito Federal NaN 2.0 1.0 ... NaN \n", + "1 Distrito Federal 10.0 3.0 2.0 ... 19.310205 \n", + "2 Jalisco 5.0 3.0 2.0 ... NaN \n", + "3 Edo. de México 1.0 2.0 1.0 ... 19.301890 \n", + "4 Jalisco 10.0 2.0 1.0 ... NaN \n", + "5 Distrito Federal 5.0 2.0 1.0 ... 19.300612 \n", + "6 Oaxaca NaN 3.0 1.0 ... 17.143504 \n", + "7 Quintana Roo 2.0 4.0 2.0 ... 20.672612 \n", + "8 Colima 1.0 2.0 1.0 ... NaN \n", + "9 Edo. de México NaN NaN NaN ... 19.316000 \n", + "10 San luis Potosí 0.0 3.0 1.0 ... 22.195878 \n", + "11 Nuevo León NaN 3.0 2.0 ... NaN \n", + "12 Querétaro 0.0 3.0 2.0 ... 20.682878 \n", + "13 Distrito Federal 20.0 NaN 0.0 ... 19.374895 \n", + "14 Jalisco NaN NaN 2.0 ... 20.729601 \n", + "15 Edo. de México NaN 3.0 2.0 ... 19.516583 \n", + "16 Querétaro 0.0 3.0 2.0 ... 20.708760 \n", + "17 Distrito Federal 5.0 3.0 2.0 ... NaN \n", + "18 Querétaro NaN NaN 0.0 ... NaN \n", + "19 Querétaro 0.0 3.0 2.0 ... NaN \n", + "20 Nuevo León 0.0 NaN 0.0 ... NaN \n", + "21 Querétaro NaN 3.0 2.0 ... NaN \n", + "22 Tamaulipas 0.0 4.0 NaN ... 22.264768 \n", + "23 Quintana Roo 4.0 3.0 2.0 ... 21.113768 \n", + "24 Querétaro 4.0 3.0 NaN ... 20.559744 \n", + "25 Nuevo León 8.0 3.0 2.0 ... NaN \n", + "26 Puebla 3.0 2.0 1.0 ... NaN \n", + "27 Distrito Federal 18.0 3.0 3.0 ... NaN \n", + "28 Jalisco 1.0 3.0 2.0 ... 20.714140 \n", + "29 Jalisco 20.0 NaN 0.0 ... 20.784515 \n", + "\n", + " lng fecha gimnasio usosmultiples piscina \\\n", + "0 NaN 2015-08-23 00:00:00 0.0 0.0 0.0 \n", + "1 -99.227655 2013-06-28 00:00:00 0.0 0.0 0.0 \n", + "2 NaN 2015-10-17 00:00:00 0.0 0.0 0.0 \n", + "3 -99.688015 2012-03-09 00:00:00 0.0 0.0 0.0 \n", + "4 NaN 2016-06-07 00:00:00 0.0 0.0 0.0 \n", + "5 -99.148475 2014-03-18 00:00:00 0.0 0.0 0.0 \n", + "6 -96.803504 2016-02-23 00:00:00 0.0 0.0 0.0 \n", + "7 -87.037968 2016-10-20 00:00:00 0.0 0.0 0.0 \n", + "8 NaN 2014-01-06 00:00:00 0.0 0.0 0.0 \n", + "9 -98.887000 2016-12-22 00:00:00 0.0 0.0 0.0 \n", + "10 -101.018956 2013-01-03 00:00:00 0.0 0.0 0.0 \n", + "11 NaN 2014-12-01 00:00:00 0.0 0.0 0.0 \n", + "12 -100.317503 2013-08-12 00:00:00 0.0 1.0 0.0 \n", + "13 -99.149519 2015-03-31 00:00:00 0.0 0.0 0.0 \n", + "14 -103.431993 2014-11-25 00:00:00 0.0 0.0 0.0 \n", + "15 -99.256028 2015-01-13 00:00:00 0.0 0.0 0.0 \n", + "16 -100.459303 2013-03-06 00:00:00 0.0 0.0 0.0 \n", + "17 NaN 2016-10-30 00:00:00 0.0 0.0 0.0 \n", + "18 NaN 2016-12-03 00:00:00 0.0 0.0 1.0 \n", + "19 NaN 2016-01-15 00:00:00 0.0 0.0 0.0 \n", + "20 NaN 2013-12-02 00:00:00 0.0 0.0 0.0 \n", + "21 NaN 2016-02-01 00:00:00 0.0 0.0 0.0 \n", + "22 -97.841012 2013-12-12 00:00:00 0.0 0.0 0.0 \n", + "23 -86.846323 2015-05-19 00:00:00 0.0 0.0 0.0 \n", + "24 -100.367382 2013-09-26 00:00:00 0.0 0.0 0.0 \n", + "25 NaN 2016-12-31 00:00:00 0.0 0.0 0.0 \n", + "26 NaN 2014-07-22 00:00:00 0.0 0.0 0.0 \n", + "27 NaN 2012-03-07 00:00:00 0.0 0.0 0.0 \n", + "28 -103.453808 2015-01-20 00:00:00 0.0 0.0 0.0 \n", + "29 -103.477062 2016-12-03 00:00:00 0.0 0.0 0.0 \n", + "\n", + " escuelascercanas centroscomercialescercanos precio \\\n", + "0 0.0 0.0 2273000.0 \n", + "1 1.0 1.0 3600000.0 \n", + "2 0.0 0.0 1200000.0 \n", + "3 1.0 1.0 650000.0 \n", + "4 0.0 0.0 1150000.0 \n", + "5 0.0 1.0 1100000.0 \n", + "6 0.0 0.0 1150000.0 \n", + "7 0.0 0.0 4200000.0 \n", + "8 1.0 1.0 310000.0 \n", + "9 0.0 0.0 6200000.0 \n", + "10 1.0 1.0 488000.0 \n", + "11 0.0 0.0 7200000.0 \n", + "12 1.0 1.0 1500000.0 \n", + "13 0.0 0.0 3000000.0 \n", + "14 0.0 0.0 5300000.0 \n", + "15 0.0 0.0 3650000.0 \n", + "16 1.0 1.0 2450000.0 \n", + "17 1.0 1.0 4750000.0 \n", + "18 1.0 1.0 794078.0 \n", + "19 0.0 0.0 1850000.0 \n", + "20 0.0 0.0 490000.0 \n", + "21 0.0 0.0 2300000.0 \n", + "22 0.0 0.0 1340000.0 \n", + "23 0.0 0.0 3500000.0 \n", + "24 1.0 1.0 3900000.0 \n", + "25 1.0 1.0 850000.0 \n", + "26 1.0 1.0 310000.0 \n", + "27 0.0 0.0 10800000.0 \n", + "28 0.0 0.0 2600000.0 \n", + "29 1.0 0.0 10707900.0 \n", + "\n", + " cantidad_tipodepropiedad_provincia \n", + "0 3 \n", + "1 2 \n", + "2 2 \n", + "3 2 \n", + "4 1 \n", + "5 3 \n", + "6 1 \n", + "7 1 \n", + "8 1 \n", + "9 1 \n", + "10 1 \n", + "11 1 \n", + "12 4 \n", + "13 1 \n", + "14 2 \n", + "15 2 \n", + "16 4 \n", + "17 3 \n", + "18 1 \n", + "19 4 \n", + "20 1 \n", + "21 1 \n", + "22 1 \n", + "23 1 \n", + "24 4 \n", + "25 1 \n", + "26 1 \n", + "27 2 \n", + "28 1 \n", + "29 1 \n", + "\n", + "[30 rows x 24 columns]" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df_train.head(30)\n", + "# df = df_train\n", + "\n", + "# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", + "cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')" + ] } ], "metadata": { diff --git a/html/features_complementarias.html b/html/features_complementarias.html index aa4949c..916dd18 100644 --- a/html/features_complementarias.html +++ b/html/features_complementarias.html @@ -13076,7 +13076,7 @@

-
In [29]:
+
In [3]:
import pandas as pd
@@ -13092,7 +13092,7 @@
 
-
In [31]:
+
In [2]:
def concatenar_categoricos(df, columna1, columna2, N=0, df_aux=None, devolver_cols=False):
@@ -13115,6 +13115,852 @@
 
+
+
+
+
In [88]:
+
+
+
def cantidad_propiedad_misma_cantidad(df, agrupar_col, misma_cantidad_col):
+    '''Cantidad de casas con la misma cantidad de banos que la actual'''
+    '''Por ejemplo, retorna para cada propiedad, cuantas hay mas de su tipo con x banos ej.'''
+    '''cantidad_propiedad_misma_cantidad(df, 'banos', 'tipodepropiedad'), para cada propiedad, se fija
+    cuantas mas de su tipo con tantos banos hay'''
+    '''Retorna el df con la columna agregada'''
+    df_aux = df.copy()
+    df_aux = df_aux.groupby([agrupar_col, misma_cantidad_col]).agg({'id': 'count'})
+    df_aux = df_aux.rename(columns={'id':'cantidad'})
+    df_aux = df_aux.reset_index()
+    
+    def get_cantidad(col1, col2):
+        cantidad = df_aux[(df_aux[agrupar_col] == col1) & (df_aux[misma_cantidad_col] == col2)]['cantidad']
+        return cantidad.values[0] if len(cantidad.values > 0) else 0
+        
+    df['cantidad_' + agrupar_col + '_' + misma_cantidad_col] = df.apply(lambda x: get_cantidad(x[agrupar_col], x[misma_cantidad_col]), axis=1)
+    return df
+
+ +
+
+
+ +
+
+
+
In [89]:
+
+
+
df = df_train.head(30)
+# df = df_train
+
+# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
+cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:15: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame.
+Try using .loc[row_indexer,col_indexer] = value instead
+
+See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  from ipykernel import kernelapp as app
+
+
+
+ +
+ +
Out[89]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
idtitulodescripciontipodepropiedaddireccionciudadprovinciaantiguedadhabitacionesgarages...latlngfechagimnasiousosmultiplespiscinaescuelascercanascentroscomercialescercanospreciocantidad_tipodepropiedad_provincia
0254099depto. tipo a-402depto. interior de 80.15m2, consta de sala com...ApartamentoAvenida Division del Norte 2005Benito JuárezDistrito FederalNaN2.01.0...NaNNaN2015-08-23 00:00:000.00.00.00.00.02273000.03
153461condominio horizontal en venta<p>entre sonora y guerrero, atr&aacute;s del h...Casa en condominioAV. MEXICOLa Magdalena ContrerasDistrito Federal10.03.02.0...19.310205-99.2276552013-06-28 00:00:000.00.00.01.01.03600000.02
2247984casa en venta urbi 3 recamaras tonaladescripcion \nla mejor ubicacion residencial e...CasaUrbi TonalaTonaláJalisco5.03.02.0...NaNNaN2015-10-17 00:00:000.00.00.00.00.01200000.02
3209067casa sola en toluca zinacantepec con credito i...casa en privada con caseta de vigilancia casas...CasaIGNACIO MANUEL ALTAMIRANO 128ZinacantepecEdo. de México1.02.01.0...19.301890-99.6880152012-03-09 00:00:000.00.00.01.01.0650000.02
4185997paseos del solbonito departamento en excelentes condiciones ...ApartamentoPASEOS DEL SOLZapopanJalisco10.02.01.0...NaNNaN2016-06-07 00:00:000.00.00.00.00.01150000.01
5126147departamento en venta taxqueñaamplio departamento, estancia de sala y comedo...ApartamentoCondominio Tlalpan 2BCoyoacánDistrito Federal5.02.01.0...19.300612-99.1484752014-03-18 00:00:000.00.00.00.01.01100000.03
6139233de oportunidad casa en san lorenzoubicada en esquina, pertenece san lorenzo agen...CasaNaNOaxaca de JuárezOaxacaNaN3.01.0...17.143504-96.8035042016-02-23 00:00:000.00.00.00.00.01150000.01
75013casa emilia en venta en selvamar playa del carmencasa emilia en venta playa del carmenfracciona...Casacondominio el trebolPlaya del CarmenQuintana Roo2.04.02.0...20.672612-87.0379682016-10-20 00:00:000.00.00.00.00.04200000.01
844962pre- venta preciosos depas 2 recamaras con sub...<p>pre-venta de preciosos departamento ecologi...ApartamentoBUENAVISTA DEPTOS CON SUBSIDIOVilla de AlvarezColima1.02.01.0...NaNNaN2014-01-06 00:00:000.00.00.01.01.0310000.01
9134537terrenoterreno de 5.500m2 bardeado, uso de suelo h-20...TerrenoAv. MorelosIxtapalucaEdo. de MéxicoNaNNaNNaN...19.316000-98.8870002016-12-22 00:00:000.00.00.00.00.06200000.01
1051180mi 2° credito<p>mi 2&deg; credito<br />para todos aquellos ...CasaMI 2° CREDITOSan Luis PotosíSan luis Potosí0.03.01.0...22.195878-101.0189562013-01-03 00:00:000.00.00.01.01.0488000.01
11103293departamento en venta, san pedro garza garcia,...departamento nuevo ,256 m2 de construccion,un ...ApartamentoNaNSan Pedro Garza GarcíaNuevo LeónNaN3.02.0...NaNNaN2014-12-01 00:00:000.00.00.00.00.07200000.01
1247890hermosa casa en zibata queretaro 170m²<p><strong>casa </strong>en venta en fracciona...CasaPitahayas 2 - 58, ZibataQuerétaroQuerétaro0.03.02.0...20.682878-100.3175032013-08-12 00:00:000.01.00.01.01.01500000.04
13130216casa matias romero - sión bancariaflamante casa habitación en condominio horizon...CasaMATIAS ROMERO, COLONIA DEL VALLEBenito JuárezDistrito Federal20.0NaN0.0...19.374895-99.1495192015-03-31 00:00:000.00.00.00.00.03000000.01
14181436casa en venta en valle realpre venta de residencia en la toscana con exce...CasaNaNZapopanJaliscoNaNNaN2.0...20.729601-103.4319932014-11-25 00:00:000.00.00.00.00.05300000.02
15201923casa en venta lomas verdes naucalpan<p>casa en dos niveles, 3 recámaras la princip...CasaNaNNaucalpan de JuárezEdo. de MéxicoNaN3.02.0...19.516583-99.2560282015-01-13 00:00:000.00.00.00.00.03650000.02
16283945preciosa casa en cumbres del lago \t<p>clave: vcln2450 fecha de actualizaci&oacute...CasaCUMBRES DEL LAGO JURIQUILLA, QUERETARO, MEXICOQuerétaroQuerétaro0.03.02.0...20.708760-100.4593032013-03-06 00:00:000.00.00.01.01.02450000.04
1773348oportunidad, departamento col del valle, 3 rec...magnifico departamento con excelente distribuc...ApartamentoPazaje Santa CruzBenito JuárezDistrito Federal5.03.02.0...NaNNaN2016-10-30 00:00:000.00.00.01.01.04750000.03
1840421últimos terrenos en preventa!! en bukara coto ...terreno ubicado en bukara coto club, el cual s...TerrenoMilenioQuerétaroQuerétaroNaNNaN0.0...NaNNaN2016-12-03 00:00:000.00.01.01.01.0794078.01
1987655NaN-- rcv151104-lv-27 -- linda casa con area d...CasaSENDA CELESTIAL 10QuerétaroQuerétaro0.03.02.0...NaNNaN2016-01-15 00:00:000.00.00.00.00.01850000.04
2090071terreno residencial en venta en cumbres quinta...<p>excelente terreno plano, frente a parque en...TerrenoTERRENO RESIDENCIAL EN VENTA EN CUMBRESMonterreyNuevo León0.0NaN0.0...NaNNaN2013-12-02 00:00:000.00.00.00.00.0490000.01
21224513casa en venta en querétaronocnok id: mx15-bk0340. hermosa casa en conjun...Casa en condominioNaNQuerétaroQuerétaroNaN3.02.0...NaNNaN2016-02-01 00:00:000.00.00.00.00.02300000.01
22129818comoda casa con recamara en planta baja y ampl...<p>comoda casa con recamara en planta baja y a...CasaNaNCiudad MaderoTamaulipas0.04.0NaN...22.264768-97.8410122013-12-12 00:00:000.00.00.00.00.01340000.01
23146699casa en condominio en venta, benito juarez, ca...casa en venta en cancun en residencial cumbres...Casa en condominioNaNCancúnQuintana Roo4.03.02.0...21.113768-86.8463232015-05-19 00:00:000.00.00.00.00.03500000.01
24235958casa en venta - colinas del cimatario, queréta...<p>propiedad en un nivel, con espacios amplios...CasaColinas del Cimatario C721QuerétaroQuerétaro4.03.0NaN...20.559744-100.3673822013-09-26 00:00:000.00.00.01.01.03900000.04
2597918casa venta santa ceciliahermosa casa en santa cecilia en apodaca, cerc...CasaLopez VelardeApodacaNuevo León8.03.02.0...NaNNaN2016-12-31 00:00:000.00.00.01.01.0850000.01
2670294casa en venta de un nivel , frac. misiones de ...¡excelente oportunidad! \n\ncasa de un nivel ...Casa en condominioMISIONES DE SAN FRANCISCOPueblaPuebla3.02.01.0...NaNNaN2014-07-22 00:00:000.00.00.01.01.0310000.01
27177031NaNhermosa casa remodelada, con acabados de lujo,...Casa en condominioPASEO DE LOS LAURELESMiguel HidalgoDistrito Federal18.03.03.0...NaNNaN2012-03-07 00:00:000.00.00.00.00.010800000.02
28144635casa en venta en santillana icasa en coto 3 recamaras, estudio, sala-comedo...Casa en condominioCoto Santillana I Casa G11ZapopanJalisco1.03.02.0...20.714140-103.4538082015-01-20 00:00:000.00.00.00.00.02600000.01
29146158terreno urbano bardeado zona tesistánen zona de urbanización progresiva, terreno pl...Terreno comercialcamino viejo a TesistanZapopanJalisco20.0NaN0.0...20.784515-103.4770622016-12-03 00:00:000.00.00.01.00.010707900.01
+

30 rows × 24 columns

+
+
+ +
+ +
+
+
From c094bc68ab64d9bea56537fed81f2ccd033a86ec Mon Sep 17 00:00:00 2001 From: tlofano Date: Fri, 22 Nov 2019 21:37:00 -0300 Subject: [PATCH 4/8] cantidad_attr --- features_complementarias.ipynb | 346 +++++++++++++++-------------- html/features_complementarias.html | 100 ++++++--- 2 files changed, 247 insertions(+), 199 deletions(-) diff --git a/features_complementarias.ipynb b/features_complementarias.ipynb index b917d62..82fb898 100644 --- a/features_complementarias.ipynb +++ b/features_complementarias.ipynb @@ -61,7 +61,28 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "def cantidad_atributo(df, col):\n", + " '''Simil a value_counts. Dada una columna, agrega al df, cuantas propiedades existen con esa misma propiedad'''\n", + " df_aux = df.copy()\n", + " df_aux = df_aux.groupby(['provincia']).agg({'id': 'count'})\n", + " df_aux = df_aux.rename(columns={'id':'cantidad'})\n", + " df_aux = df_aux.reset_index()\n", + "\n", + " def get_cantidad(col1):\n", + " cantidad = df_aux[df_aux[col] == col1]['cantidad']\n", + " return cantidad.values[0] if len(cantidad.values > 0) else 0\n", + " \n", + " df['cantidad_propiedades_en_' + col] = df.apply(lambda x: get_cantidad(x[col]), axis=1)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 98, "metadata": { "scrolled": false }, @@ -70,12 +91,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:15: SettingWithCopyWarning: \n", + "/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " from ipykernel import kernelapp as app\n" + " if sys.path[0] == '':\n" ] }, { @@ -110,7 +131,6 @@ " habitaciones\n", " garages\n", " ...\n", - " lat\n", " lng\n", " fecha\n", " gimnasio\n", @@ -120,6 +140,7 @@ " centroscomercialescercanos\n", " precio\n", " cantidad_tipodepropiedad_provincia\n", + " cantidad_propiedades_en_provincia\n", " \n", " \n", " \n", @@ -137,7 +158,6 @@ " 1.0\n", " ...\n", " NaN\n", - " NaN\n", " 2015-08-23 00:00:00\n", " 0.0\n", " 0.0\n", @@ -146,6 +166,7 @@ " 0.0\n", " 2273000.0\n", " 3\n", + " 6\n", " \n", " \n", " 1\n", @@ -160,7 +181,6 @@ " 3.0\n", " 2.0\n", " ...\n", - " 19.310205\n", " -99.227655\n", " 2013-06-28 00:00:00\n", " 0.0\n", @@ -170,6 +190,7 @@ " 1.0\n", " 3600000.0\n", " 2\n", + " 6\n", " \n", " \n", " 2\n", @@ -185,7 +206,6 @@ " 2.0\n", " ...\n", " NaN\n", - " NaN\n", " 2015-10-17 00:00:00\n", " 0.0\n", " 0.0\n", @@ -194,6 +214,7 @@ " 0.0\n", " 1200000.0\n", " 2\n", + " 5\n", " \n", " \n", " 3\n", @@ -208,7 +229,6 @@ " 2.0\n", " 1.0\n", " ...\n", - " 19.301890\n", " -99.688015\n", " 2012-03-09 00:00:00\n", " 0.0\n", @@ -218,6 +238,7 @@ " 1.0\n", " 650000.0\n", " 2\n", + " 3\n", " \n", " \n", " 4\n", @@ -233,7 +254,6 @@ " 1.0\n", " ...\n", " NaN\n", - " NaN\n", " 2016-06-07 00:00:00\n", " 0.0\n", " 0.0\n", @@ -242,6 +262,7 @@ " 0.0\n", " 1150000.0\n", " 1\n", + " 5\n", " \n", " \n", " 5\n", @@ -256,7 +277,6 @@ " 2.0\n", " 1.0\n", " ...\n", - " 19.300612\n", " -99.148475\n", " 2014-03-18 00:00:00\n", " 0.0\n", @@ -266,6 +286,7 @@ " 1.0\n", " 1100000.0\n", " 3\n", + " 6\n", " \n", " \n", " 6\n", @@ -280,7 +301,6 @@ " 3.0\n", " 1.0\n", " ...\n", - " 17.143504\n", " -96.803504\n", " 2016-02-23 00:00:00\n", " 0.0\n", @@ -290,6 +310,7 @@ " 0.0\n", " 1150000.0\n", " 1\n", + " 1\n", " \n", " \n", " 7\n", @@ -304,7 +325,6 @@ " 4.0\n", " 2.0\n", " ...\n", - " 20.672612\n", " -87.037968\n", " 2016-10-20 00:00:00\n", " 0.0\n", @@ -314,6 +334,7 @@ " 0.0\n", " 4200000.0\n", " 1\n", + " 2\n", " \n", " \n", " 8\n", @@ -329,7 +350,6 @@ " 1.0\n", " ...\n", " NaN\n", - " NaN\n", " 2014-01-06 00:00:00\n", " 0.0\n", " 0.0\n", @@ -338,6 +358,7 @@ " 1.0\n", " 310000.0\n", " 1\n", + " 1\n", " \n", " \n", " 9\n", @@ -352,7 +373,6 @@ " NaN\n", " NaN\n", " ...\n", - " 19.316000\n", " -98.887000\n", " 2016-12-22 00:00:00\n", " 0.0\n", @@ -362,6 +382,7 @@ " 0.0\n", " 6200000.0\n", " 1\n", + " 3\n", " \n", " \n", " 10\n", @@ -376,7 +397,6 @@ " 3.0\n", " 1.0\n", " ...\n", - " 22.195878\n", " -101.018956\n", " 2013-01-03 00:00:00\n", " 0.0\n", @@ -386,6 +406,7 @@ " 1.0\n", " 488000.0\n", " 1\n", + " 1\n", " \n", " \n", " 11\n", @@ -401,7 +422,6 @@ " 2.0\n", " ...\n", " NaN\n", - " NaN\n", " 2014-12-01 00:00:00\n", " 0.0\n", " 0.0\n", @@ -410,6 +430,7 @@ " 0.0\n", " 7200000.0\n", " 1\n", + " 3\n", " \n", " \n", " 12\n", @@ -424,7 +445,6 @@ " 3.0\n", " 2.0\n", " ...\n", - " 20.682878\n", " -100.317503\n", " 2013-08-12 00:00:00\n", " 0.0\n", @@ -434,6 +454,7 @@ " 1.0\n", " 1500000.0\n", " 4\n", + " 6\n", " \n", " \n", " 13\n", @@ -448,7 +469,6 @@ " NaN\n", " 0.0\n", " ...\n", - " 19.374895\n", " -99.149519\n", " 2015-03-31 00:00:00\n", " 0.0\n", @@ -458,6 +478,7 @@ " 0.0\n", " 3000000.0\n", " 1\n", + " 6\n", " \n", " \n", " 14\n", @@ -472,7 +493,6 @@ " NaN\n", " 2.0\n", " ...\n", - " 20.729601\n", " -103.431993\n", " 2014-11-25 00:00:00\n", " 0.0\n", @@ -482,6 +502,7 @@ " 0.0\n", " 5300000.0\n", " 2\n", + " 5\n", " \n", " \n", " 15\n", @@ -496,7 +517,6 @@ " 3.0\n", " 2.0\n", " ...\n", - " 19.516583\n", " -99.256028\n", " 2015-01-13 00:00:00\n", " 0.0\n", @@ -506,6 +526,7 @@ " 0.0\n", " 3650000.0\n", " 2\n", + " 3\n", " \n", " \n", " 16\n", @@ -520,7 +541,6 @@ " 3.0\n", " 2.0\n", " ...\n", - " 20.708760\n", " -100.459303\n", " 2013-03-06 00:00:00\n", " 0.0\n", @@ -530,6 +550,7 @@ " 1.0\n", " 2450000.0\n", " 4\n", + " 6\n", " \n", " \n", " 17\n", @@ -545,7 +566,6 @@ " 2.0\n", " ...\n", " NaN\n", - " NaN\n", " 2016-10-30 00:00:00\n", " 0.0\n", " 0.0\n", @@ -554,6 +574,7 @@ " 1.0\n", " 4750000.0\n", " 3\n", + " 6\n", " \n", " \n", " 18\n", @@ -569,7 +590,6 @@ " 0.0\n", " ...\n", " NaN\n", - " NaN\n", " 2016-12-03 00:00:00\n", " 0.0\n", " 0.0\n", @@ -578,6 +598,7 @@ " 1.0\n", " 794078.0\n", " 1\n", + " 6\n", " \n", " \n", " 19\n", @@ -593,7 +614,6 @@ " 2.0\n", " ...\n", " NaN\n", - " NaN\n", " 2016-01-15 00:00:00\n", " 0.0\n", " 0.0\n", @@ -602,6 +622,7 @@ " 0.0\n", " 1850000.0\n", " 4\n", + " 6\n", " \n", " \n", " 20\n", @@ -617,7 +638,6 @@ " 0.0\n", " ...\n", " NaN\n", - " NaN\n", " 2013-12-02 00:00:00\n", " 0.0\n", " 0.0\n", @@ -626,6 +646,7 @@ " 0.0\n", " 490000.0\n", " 1\n", + " 3\n", " \n", " \n", " 21\n", @@ -641,7 +662,6 @@ " 2.0\n", " ...\n", " NaN\n", - " NaN\n", " 2016-02-01 00:00:00\n", " 0.0\n", " 0.0\n", @@ -650,6 +670,7 @@ " 0.0\n", " 2300000.0\n", " 1\n", + " 6\n", " \n", " \n", " 22\n", @@ -664,7 +685,6 @@ " 4.0\n", " NaN\n", " ...\n", - " 22.264768\n", " -97.841012\n", " 2013-12-12 00:00:00\n", " 0.0\n", @@ -674,6 +694,7 @@ " 0.0\n", " 1340000.0\n", " 1\n", + " 1\n", " \n", " \n", " 23\n", @@ -688,7 +709,6 @@ " 3.0\n", " 2.0\n", " ...\n", - " 21.113768\n", " -86.846323\n", " 2015-05-19 00:00:00\n", " 0.0\n", @@ -698,6 +718,7 @@ " 0.0\n", " 3500000.0\n", " 1\n", + " 2\n", " \n", " \n", " 24\n", @@ -712,7 +733,6 @@ " 3.0\n", " NaN\n", " ...\n", - " 20.559744\n", " -100.367382\n", " 2013-09-26 00:00:00\n", " 0.0\n", @@ -722,6 +742,7 @@ " 1.0\n", " 3900000.0\n", " 4\n", + " 6\n", " \n", " \n", " 25\n", @@ -737,7 +758,6 @@ " 2.0\n", " ...\n", " NaN\n", - " NaN\n", " 2016-12-31 00:00:00\n", " 0.0\n", " 0.0\n", @@ -746,6 +766,7 @@ " 1.0\n", " 850000.0\n", " 1\n", + " 3\n", " \n", " \n", " 26\n", @@ -761,7 +782,6 @@ " 1.0\n", " ...\n", " NaN\n", - " NaN\n", " 2014-07-22 00:00:00\n", " 0.0\n", " 0.0\n", @@ -770,6 +790,7 @@ " 1.0\n", " 310000.0\n", " 1\n", + " 1\n", " \n", " \n", " 27\n", @@ -785,7 +806,6 @@ " 3.0\n", " ...\n", " NaN\n", - " NaN\n", " 2012-03-07 00:00:00\n", " 0.0\n", " 0.0\n", @@ -794,6 +814,7 @@ " 0.0\n", " 10800000.0\n", " 2\n", + " 6\n", " \n", " \n", " 28\n", @@ -808,7 +829,6 @@ " 3.0\n", " 2.0\n", " ...\n", - " 20.714140\n", " -103.453808\n", " 2015-01-20 00:00:00\n", " 0.0\n", @@ -818,6 +838,7 @@ " 0.0\n", " 2600000.0\n", " 1\n", + " 5\n", " \n", " \n", " 29\n", @@ -832,7 +853,6 @@ " NaN\n", " 0.0\n", " ...\n", - " 20.784515\n", " -103.477062\n", " 2016-12-03 00:00:00\n", " 0.0\n", @@ -842,10 +862,11 @@ " 0.0\n", " 10707900.0\n", " 1\n", + " 5\n", " \n", " \n", "\n", - "

30 rows × 24 columns

\n", + "

30 rows × 25 columns

\n", "
" ], "text/plain": [ @@ -945,138 +966,138 @@ "28 Coto Santillana I Casa G11 Zapopan \n", "29 camino viejo a Tesistan Zapopan \n", "\n", - " provincia antiguedad habitaciones garages ... lat \\\n", - "0 Distrito Federal NaN 2.0 1.0 ... NaN \n", - "1 Distrito Federal 10.0 3.0 2.0 ... 19.310205 \n", - "2 Jalisco 5.0 3.0 2.0 ... NaN \n", - "3 Edo. de México 1.0 2.0 1.0 ... 19.301890 \n", - "4 Jalisco 10.0 2.0 1.0 ... NaN \n", - "5 Distrito Federal 5.0 2.0 1.0 ... 19.300612 \n", - "6 Oaxaca NaN 3.0 1.0 ... 17.143504 \n", - "7 Quintana Roo 2.0 4.0 2.0 ... 20.672612 \n", - "8 Colima 1.0 2.0 1.0 ... NaN \n", - "9 Edo. de México NaN NaN NaN ... 19.316000 \n", - "10 San luis Potosí 0.0 3.0 1.0 ... 22.195878 \n", - "11 Nuevo León NaN 3.0 2.0 ... NaN \n", - "12 Querétaro 0.0 3.0 2.0 ... 20.682878 \n", - "13 Distrito Federal 20.0 NaN 0.0 ... 19.374895 \n", - "14 Jalisco NaN NaN 2.0 ... 20.729601 \n", - "15 Edo. de México NaN 3.0 2.0 ... 19.516583 \n", - "16 Querétaro 0.0 3.0 2.0 ... 20.708760 \n", - "17 Distrito Federal 5.0 3.0 2.0 ... NaN \n", - "18 Querétaro NaN NaN 0.0 ... NaN \n", - "19 Querétaro 0.0 3.0 2.0 ... NaN \n", - "20 Nuevo León 0.0 NaN 0.0 ... NaN \n", - "21 Querétaro NaN 3.0 2.0 ... NaN \n", - "22 Tamaulipas 0.0 4.0 NaN ... 22.264768 \n", - "23 Quintana Roo 4.0 3.0 2.0 ... 21.113768 \n", - "24 Querétaro 4.0 3.0 NaN ... 20.559744 \n", - "25 Nuevo León 8.0 3.0 2.0 ... NaN \n", - "26 Puebla 3.0 2.0 1.0 ... NaN \n", - "27 Distrito Federal 18.0 3.0 3.0 ... NaN \n", - "28 Jalisco 1.0 3.0 2.0 ... 20.714140 \n", - "29 Jalisco 20.0 NaN 0.0 ... 20.784515 \n", + " provincia antiguedad habitaciones garages ... lng \\\n", + "0 Distrito Federal NaN 2.0 1.0 ... NaN \n", + "1 Distrito Federal 10.0 3.0 2.0 ... -99.227655 \n", + "2 Jalisco 5.0 3.0 2.0 ... NaN \n", + "3 Edo. de México 1.0 2.0 1.0 ... -99.688015 \n", + "4 Jalisco 10.0 2.0 1.0 ... NaN \n", + "5 Distrito Federal 5.0 2.0 1.0 ... -99.148475 \n", + "6 Oaxaca NaN 3.0 1.0 ... -96.803504 \n", + "7 Quintana Roo 2.0 4.0 2.0 ... -87.037968 \n", + "8 Colima 1.0 2.0 1.0 ... NaN \n", + "9 Edo. de México NaN NaN NaN ... -98.887000 \n", + "10 San luis Potosí 0.0 3.0 1.0 ... -101.018956 \n", + "11 Nuevo León NaN 3.0 2.0 ... NaN \n", + "12 Querétaro 0.0 3.0 2.0 ... -100.317503 \n", + "13 Distrito Federal 20.0 NaN 0.0 ... -99.149519 \n", + "14 Jalisco NaN NaN 2.0 ... -103.431993 \n", + "15 Edo. de México NaN 3.0 2.0 ... -99.256028 \n", + "16 Querétaro 0.0 3.0 2.0 ... -100.459303 \n", + "17 Distrito Federal 5.0 3.0 2.0 ... NaN \n", + "18 Querétaro NaN NaN 0.0 ... NaN \n", + "19 Querétaro 0.0 3.0 2.0 ... NaN \n", + "20 Nuevo León 0.0 NaN 0.0 ... NaN \n", + "21 Querétaro NaN 3.0 2.0 ... NaN \n", + "22 Tamaulipas 0.0 4.0 NaN ... -97.841012 \n", + "23 Quintana Roo 4.0 3.0 2.0 ... -86.846323 \n", + "24 Querétaro 4.0 3.0 NaN ... -100.367382 \n", + "25 Nuevo León 8.0 3.0 2.0 ... NaN \n", + "26 Puebla 3.0 2.0 1.0 ... NaN \n", + "27 Distrito Federal 18.0 3.0 3.0 ... NaN \n", + "28 Jalisco 1.0 3.0 2.0 ... -103.453808 \n", + "29 Jalisco 20.0 NaN 0.0 ... -103.477062 \n", "\n", - " lng fecha gimnasio usosmultiples piscina \\\n", - "0 NaN 2015-08-23 00:00:00 0.0 0.0 0.0 \n", - "1 -99.227655 2013-06-28 00:00:00 0.0 0.0 0.0 \n", - "2 NaN 2015-10-17 00:00:00 0.0 0.0 0.0 \n", - "3 -99.688015 2012-03-09 00:00:00 0.0 0.0 0.0 \n", - "4 NaN 2016-06-07 00:00:00 0.0 0.0 0.0 \n", - "5 -99.148475 2014-03-18 00:00:00 0.0 0.0 0.0 \n", - "6 -96.803504 2016-02-23 00:00:00 0.0 0.0 0.0 \n", - "7 -87.037968 2016-10-20 00:00:00 0.0 0.0 0.0 \n", - "8 NaN 2014-01-06 00:00:00 0.0 0.0 0.0 \n", - "9 -98.887000 2016-12-22 00:00:00 0.0 0.0 0.0 \n", - "10 -101.018956 2013-01-03 00:00:00 0.0 0.0 0.0 \n", - "11 NaN 2014-12-01 00:00:00 0.0 0.0 0.0 \n", - "12 -100.317503 2013-08-12 00:00:00 0.0 1.0 0.0 \n", - "13 -99.149519 2015-03-31 00:00:00 0.0 0.0 0.0 \n", - "14 -103.431993 2014-11-25 00:00:00 0.0 0.0 0.0 \n", - "15 -99.256028 2015-01-13 00:00:00 0.0 0.0 0.0 \n", - "16 -100.459303 2013-03-06 00:00:00 0.0 0.0 0.0 \n", - "17 NaN 2016-10-30 00:00:00 0.0 0.0 0.0 \n", - "18 NaN 2016-12-03 00:00:00 0.0 0.0 1.0 \n", - "19 NaN 2016-01-15 00:00:00 0.0 0.0 0.0 \n", - "20 NaN 2013-12-02 00:00:00 0.0 0.0 0.0 \n", - "21 NaN 2016-02-01 00:00:00 0.0 0.0 0.0 \n", - "22 -97.841012 2013-12-12 00:00:00 0.0 0.0 0.0 \n", - "23 -86.846323 2015-05-19 00:00:00 0.0 0.0 0.0 \n", - "24 -100.367382 2013-09-26 00:00:00 0.0 0.0 0.0 \n", - "25 NaN 2016-12-31 00:00:00 0.0 0.0 0.0 \n", - "26 NaN 2014-07-22 00:00:00 0.0 0.0 0.0 \n", - "27 NaN 2012-03-07 00:00:00 0.0 0.0 0.0 \n", - "28 -103.453808 2015-01-20 00:00:00 0.0 0.0 0.0 \n", - "29 -103.477062 2016-12-03 00:00:00 0.0 0.0 0.0 \n", + " fecha gimnasio usosmultiples piscina escuelascercanas \\\n", + "0 2015-08-23 00:00:00 0.0 0.0 0.0 0.0 \n", + "1 2013-06-28 00:00:00 0.0 0.0 0.0 1.0 \n", + "2 2015-10-17 00:00:00 0.0 0.0 0.0 0.0 \n", + "3 2012-03-09 00:00:00 0.0 0.0 0.0 1.0 \n", + "4 2016-06-07 00:00:00 0.0 0.0 0.0 0.0 \n", + "5 2014-03-18 00:00:00 0.0 0.0 0.0 0.0 \n", + "6 2016-02-23 00:00:00 0.0 0.0 0.0 0.0 \n", + "7 2016-10-20 00:00:00 0.0 0.0 0.0 0.0 \n", + "8 2014-01-06 00:00:00 0.0 0.0 0.0 1.0 \n", + "9 2016-12-22 00:00:00 0.0 0.0 0.0 0.0 \n", + "10 2013-01-03 00:00:00 0.0 0.0 0.0 1.0 \n", + "11 2014-12-01 00:00:00 0.0 0.0 0.0 0.0 \n", + "12 2013-08-12 00:00:00 0.0 1.0 0.0 1.0 \n", + "13 2015-03-31 00:00:00 0.0 0.0 0.0 0.0 \n", + "14 2014-11-25 00:00:00 0.0 0.0 0.0 0.0 \n", + "15 2015-01-13 00:00:00 0.0 0.0 0.0 0.0 \n", + "16 2013-03-06 00:00:00 0.0 0.0 0.0 1.0 \n", + "17 2016-10-30 00:00:00 0.0 0.0 0.0 1.0 \n", + "18 2016-12-03 00:00:00 0.0 0.0 1.0 1.0 \n", + "19 2016-01-15 00:00:00 0.0 0.0 0.0 0.0 \n", + "20 2013-12-02 00:00:00 0.0 0.0 0.0 0.0 \n", + "21 2016-02-01 00:00:00 0.0 0.0 0.0 0.0 \n", + "22 2013-12-12 00:00:00 0.0 0.0 0.0 0.0 \n", + "23 2015-05-19 00:00:00 0.0 0.0 0.0 0.0 \n", + "24 2013-09-26 00:00:00 0.0 0.0 0.0 1.0 \n", + "25 2016-12-31 00:00:00 0.0 0.0 0.0 1.0 \n", + "26 2014-07-22 00:00:00 0.0 0.0 0.0 1.0 \n", + "27 2012-03-07 00:00:00 0.0 0.0 0.0 0.0 \n", + "28 2015-01-20 00:00:00 0.0 0.0 0.0 0.0 \n", + "29 2016-12-03 00:00:00 0.0 0.0 0.0 1.0 \n", "\n", - " escuelascercanas centroscomercialescercanos precio \\\n", - "0 0.0 0.0 2273000.0 \n", - "1 1.0 1.0 3600000.0 \n", - "2 0.0 0.0 1200000.0 \n", - "3 1.0 1.0 650000.0 \n", - "4 0.0 0.0 1150000.0 \n", - "5 0.0 1.0 1100000.0 \n", - "6 0.0 0.0 1150000.0 \n", - "7 0.0 0.0 4200000.0 \n", - "8 1.0 1.0 310000.0 \n", - "9 0.0 0.0 6200000.0 \n", - "10 1.0 1.0 488000.0 \n", - "11 0.0 0.0 7200000.0 \n", - "12 1.0 1.0 1500000.0 \n", - "13 0.0 0.0 3000000.0 \n", - "14 0.0 0.0 5300000.0 \n", - "15 0.0 0.0 3650000.0 \n", - "16 1.0 1.0 2450000.0 \n", - "17 1.0 1.0 4750000.0 \n", - "18 1.0 1.0 794078.0 \n", - "19 0.0 0.0 1850000.0 \n", - "20 0.0 0.0 490000.0 \n", - "21 0.0 0.0 2300000.0 \n", - "22 0.0 0.0 1340000.0 \n", - "23 0.0 0.0 3500000.0 \n", - "24 1.0 1.0 3900000.0 \n", - "25 1.0 1.0 850000.0 \n", - "26 1.0 1.0 310000.0 \n", - "27 0.0 0.0 10800000.0 \n", - "28 0.0 0.0 2600000.0 \n", - "29 1.0 0.0 10707900.0 \n", + " centroscomercialescercanos precio cantidad_tipodepropiedad_provincia \\\n", + "0 0.0 2273000.0 3 \n", + "1 1.0 3600000.0 2 \n", + "2 0.0 1200000.0 2 \n", + "3 1.0 650000.0 2 \n", + "4 0.0 1150000.0 1 \n", + "5 1.0 1100000.0 3 \n", + "6 0.0 1150000.0 1 \n", + "7 0.0 4200000.0 1 \n", + "8 1.0 310000.0 1 \n", + "9 0.0 6200000.0 1 \n", + "10 1.0 488000.0 1 \n", + "11 0.0 7200000.0 1 \n", + "12 1.0 1500000.0 4 \n", + "13 0.0 3000000.0 1 \n", + "14 0.0 5300000.0 2 \n", + "15 0.0 3650000.0 2 \n", + "16 1.0 2450000.0 4 \n", + "17 1.0 4750000.0 3 \n", + "18 1.0 794078.0 1 \n", + "19 0.0 1850000.0 4 \n", + "20 0.0 490000.0 1 \n", + "21 0.0 2300000.0 1 \n", + "22 0.0 1340000.0 1 \n", + "23 0.0 3500000.0 1 \n", + "24 1.0 3900000.0 4 \n", + "25 1.0 850000.0 1 \n", + "26 1.0 310000.0 1 \n", + "27 0.0 10800000.0 2 \n", + "28 0.0 2600000.0 1 \n", + "29 0.0 10707900.0 1 \n", "\n", - " cantidad_tipodepropiedad_provincia \n", - "0 3 \n", - "1 2 \n", - "2 2 \n", - "3 2 \n", - "4 1 \n", - "5 3 \n", - "6 1 \n", - "7 1 \n", - "8 1 \n", - "9 1 \n", - "10 1 \n", - "11 1 \n", - "12 4 \n", - "13 1 \n", - "14 2 \n", - "15 2 \n", - "16 4 \n", - "17 3 \n", - "18 1 \n", - "19 4 \n", - "20 1 \n", - "21 1 \n", - "22 1 \n", - "23 1 \n", - "24 4 \n", - "25 1 \n", - "26 1 \n", - "27 2 \n", - "28 1 \n", - "29 1 \n", + " cantidad_propiedades_en_provincia \n", + "0 6 \n", + "1 6 \n", + "2 5 \n", + "3 3 \n", + "4 5 \n", + "5 6 \n", + "6 1 \n", + "7 2 \n", + "8 1 \n", + "9 3 \n", + "10 1 \n", + "11 3 \n", + "12 6 \n", + "13 6 \n", + "14 5 \n", + "15 3 \n", + "16 6 \n", + "17 6 \n", + "18 6 \n", + "19 6 \n", + "20 3 \n", + "21 6 \n", + "22 1 \n", + "23 2 \n", + "24 6 \n", + "25 3 \n", + "26 1 \n", + "27 6 \n", + "28 5 \n", + "29 5 \n", "\n", - "[30 rows x 24 columns]" + "[30 rows x 25 columns]" ] }, - "execution_count": 89, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } @@ -1086,7 +1107,8 @@ "# df = df_train\n", "\n", "# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", - "cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')" + "# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", + "cantidad_atributo(df, 'provincia')" ] } ], diff --git a/html/features_complementarias.html b/html/features_complementarias.html index 916dd18..404fd93 100644 --- a/html/features_complementarias.html +++ b/html/features_complementarias.html @@ -13147,14 +13147,40 @@
-
In [89]:
+
In [97]:
+
+
+
def cantidad_atributo(df, col):
+    '''Simil a value_counts. Dada una columna, agrega al df, cuantas propiedades existen con esa misma propiedad'''
+    df_aux = df.copy()
+    df_aux = df_aux.groupby(['provincia']).agg({'id': 'count'})
+    df_aux = df_aux.rename(columns={'id':'cantidad'})
+    df_aux = df_aux.reset_index()
+
+    def get_cantidad(col1):
+        cantidad = df_aux[df_aux[col] == col1]['cantidad']
+        return cantidad.values[0] if len(cantidad.values > 0) else 0
+    
+    df['cantidad_propiedades_en_' + col] = df.apply(lambda x: get_cantidad(x[col]), axis=1)
+    return df
+
+ +
+
+
+ +
+
+
+
In [98]:
df = df_train.head(30)
 # df = df_train
 
 # concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
-cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
+# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
+cantidad_atributo(df, 'provincia')
 
@@ -13171,19 +13197,19 @@
-
/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:15: SettingWithCopyWarning: 
+
/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame.
 Try using .loc[row_indexer,col_indexer] = value instead
 
 See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
-  from ipykernel import kernelapp as app
+  if sys.path[0] == '':
 
-
Out[89]:
+
Out[98]:
@@ -13217,7 +13243,6 @@ habitaciones garages ... - lat lng fecha gimnasio @@ -13227,6 +13252,7 @@ centroscomercialescercanos precio cantidad_tipodepropiedad_provincia + cantidad_propiedades_en_provincia @@ -13244,7 +13270,6 @@ 1.0 ... NaN - NaN 2015-08-23 00:00:00 0.0 0.0 @@ -13253,6 +13278,7 @@ 0.0 2273000.0 3 + 6 1 @@ -13267,7 +13293,6 @@ 3.0 2.0 ... - 19.310205 -99.227655 2013-06-28 00:00:00 0.0 @@ -13277,6 +13302,7 @@ 1.0 3600000.0 2 + 6 2 @@ -13292,7 +13318,6 @@ 2.0 ... NaN - NaN 2015-10-17 00:00:00 0.0 0.0 @@ -13301,6 +13326,7 @@ 0.0 1200000.0 2 + 5 3 @@ -13315,7 +13341,6 @@ 2.0 1.0 ... - 19.301890 -99.688015 2012-03-09 00:00:00 0.0 @@ -13325,6 +13350,7 @@ 1.0 650000.0 2 + 3 4 @@ -13340,7 +13366,6 @@ 1.0 ... NaN - NaN 2016-06-07 00:00:00 0.0 0.0 @@ -13349,6 +13374,7 @@ 0.0 1150000.0 1 + 5 5 @@ -13363,7 +13389,6 @@ 2.0 1.0 ... - 19.300612 -99.148475 2014-03-18 00:00:00 0.0 @@ -13373,6 +13398,7 @@ 1.0 1100000.0 3 + 6 6 @@ -13387,7 +13413,6 @@ 3.0 1.0 ... - 17.143504 -96.803504 2016-02-23 00:00:00 0.0 @@ -13397,6 +13422,7 @@ 0.0 1150000.0 1 + 1 7 @@ -13411,7 +13437,6 @@ 4.0 2.0 ... - 20.672612 -87.037968 2016-10-20 00:00:00 0.0 @@ -13421,6 +13446,7 @@ 0.0 4200000.0 1 + 2 8 @@ -13436,7 +13462,6 @@ 1.0 ... NaN - NaN 2014-01-06 00:00:00 0.0 0.0 @@ -13445,6 +13470,7 @@ 1.0 310000.0 1 + 1 9 @@ -13459,7 +13485,6 @@ NaN NaN ... - 19.316000 -98.887000 2016-12-22 00:00:00 0.0 @@ -13469,6 +13494,7 @@ 0.0 6200000.0 1 + 3 10 @@ -13483,7 +13509,6 @@ 3.0 1.0 ... - 22.195878 -101.018956 2013-01-03 00:00:00 0.0 @@ -13493,6 +13518,7 @@ 1.0 488000.0 1 + 1 11 @@ -13508,7 +13534,6 @@ 2.0 ... NaN - NaN 2014-12-01 00:00:00 0.0 0.0 @@ -13517,6 +13542,7 @@ 0.0 7200000.0 1 + 3 12 @@ -13531,7 +13557,6 @@ 3.0 2.0 ... - 20.682878 -100.317503 2013-08-12 00:00:00 0.0 @@ -13541,6 +13566,7 @@ 1.0 1500000.0 4 + 6 13 @@ -13555,7 +13581,6 @@ NaN 0.0 ... - 19.374895 -99.149519 2015-03-31 00:00:00 0.0 @@ -13565,6 +13590,7 @@ 0.0 3000000.0 1 + 6 14 @@ -13579,7 +13605,6 @@ NaN 2.0 ... - 20.729601 -103.431993 2014-11-25 00:00:00 0.0 @@ -13589,6 +13614,7 @@ 0.0 5300000.0 2 + 5 15 @@ -13603,7 +13629,6 @@ 3.0 2.0 ... - 19.516583 -99.256028 2015-01-13 00:00:00 0.0 @@ -13613,6 +13638,7 @@ 0.0 3650000.0 2 + 3 16 @@ -13627,7 +13653,6 @@ 3.0 2.0 ... - 20.708760 -100.459303 2013-03-06 00:00:00 0.0 @@ -13637,6 +13662,7 @@ 1.0 2450000.0 4 + 6 17 @@ -13652,7 +13678,6 @@ 2.0 ... NaN - NaN 2016-10-30 00:00:00 0.0 0.0 @@ -13661,6 +13686,7 @@ 1.0 4750000.0 3 + 6 18 @@ -13676,7 +13702,6 @@ 0.0 ... NaN - NaN 2016-12-03 00:00:00 0.0 0.0 @@ -13685,6 +13710,7 @@ 1.0 794078.0 1 + 6 19 @@ -13700,7 +13726,6 @@ 2.0 ... NaN - NaN 2016-01-15 00:00:00 0.0 0.0 @@ -13709,6 +13734,7 @@ 0.0 1850000.0 4 + 6 20 @@ -13724,7 +13750,6 @@ 0.0 ... NaN - NaN 2013-12-02 00:00:00 0.0 0.0 @@ -13733,6 +13758,7 @@ 0.0 490000.0 1 + 3 21 @@ -13748,7 +13774,6 @@ 2.0 ... NaN - NaN 2016-02-01 00:00:00 0.0 0.0 @@ -13757,6 +13782,7 @@ 0.0 2300000.0 1 + 6 22 @@ -13771,7 +13797,6 @@ 4.0 NaN ... - 22.264768 -97.841012 2013-12-12 00:00:00 0.0 @@ -13781,6 +13806,7 @@ 0.0 1340000.0 1 + 1 23 @@ -13795,7 +13821,6 @@ 3.0 2.0 ... - 21.113768 -86.846323 2015-05-19 00:00:00 0.0 @@ -13805,6 +13830,7 @@ 0.0 3500000.0 1 + 2 24 @@ -13819,7 +13845,6 @@ 3.0 NaN ... - 20.559744 -100.367382 2013-09-26 00:00:00 0.0 @@ -13829,6 +13854,7 @@ 1.0 3900000.0 4 + 6 25 @@ -13844,7 +13870,6 @@ 2.0 ... NaN - NaN 2016-12-31 00:00:00 0.0 0.0 @@ -13853,6 +13878,7 @@ 1.0 850000.0 1 + 3 26 @@ -13868,7 +13894,6 @@ 1.0 ... NaN - NaN 2014-07-22 00:00:00 0.0 0.0 @@ -13877,6 +13902,7 @@ 1.0 310000.0 1 + 1 27 @@ -13892,7 +13918,6 @@ 3.0 ... NaN - NaN 2012-03-07 00:00:00 0.0 0.0 @@ -13901,6 +13926,7 @@ 0.0 10800000.0 2 + 6 28 @@ -13915,7 +13941,6 @@ 3.0 2.0 ... - 20.714140 -103.453808 2015-01-20 00:00:00 0.0 @@ -13925,6 +13950,7 @@ 0.0 2600000.0 1 + 5 29 @@ -13939,7 +13965,6 @@ NaN 0.0 ... - 20.784515 -103.477062 2016-12-03 00:00:00 0.0 @@ -13949,10 +13974,11 @@ 0.0 10707900.0 1 + 5 -

30 rows × 24 columns

+

30 rows × 25 columns

From 24b7ebc8be91dc341e8181de97124ad42bbb05b8 Mon Sep 17 00:00:00 2001 From: tlofano Date: Fri, 22 Nov 2019 22:30:01 -0300 Subject: [PATCH 5/8] ranking por atributos --- features_complementarias.ipynb | 181 ++++++++++++++++++----------- html/features_complementarias.html | 123 ++++++++++++++------ 2 files changed, 197 insertions(+), 107 deletions(-) diff --git a/features_complementarias.ipynb b/features_complementarias.ipynb index 82fb898..3c4f175 100644 --- a/features_complementarias.ipynb +++ b/features_complementarias.ipynb @@ -61,14 +61,14 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "def cantidad_atributo(df, col):\n", " '''Simil a value_counts. Dada una columna, agrega al df, cuantas propiedades existen con esa misma propiedad'''\n", " df_aux = df.copy()\n", - " df_aux = df_aux.groupby(['provincia']).agg({'id': 'count'})\n", + " df_aux = df_aux.groupby([col]).agg({'id': 'count'})\n", " df_aux = df_aux.rename(columns={'id':'cantidad'})\n", " df_aux = df_aux.reset_index()\n", "\n", @@ -82,7 +82,44 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 151, + "metadata": {}, + "outputs": [], + "source": [ + "def ranking_atributo(df, col):\n", + " '''Cuenta cuantas propiedades hay con los atributos de la columna recibida. Y para cada propiedad,\n", + " se fija en que posicion esta del ranking. Por ejemplo, si hay dos propuedades, una con dos banos y otra \n", + " con uno solo, el rankin de las que tengan dos banos en 1, y de las propiedades que tengan un solo bano es \n", + " dos'''\n", + " \n", + " '''Rellena los nulos con \"otros\" '''\n", + " df[col] = df[col].fillna('otros')\n", + " \n", + " df_aux = df.copy()\n", + " df_aux = df_aux.groupby([col]).agg({'id': 'count'})\n", + " df_aux = df_aux.rename(columns={'id':'cantidad'})\n", + " df_aux = df_aux.reset_index()\n", + " \n", + " df_aux = df_aux.nlargest(df_aux.shape[0], 'cantidad') \n", + " lista_ranking = df_aux[col].tolist()\n", + " rank = {}\n", + " contador = 1\n", + " \n", + " for item in lista_ranking:\n", + " rank[item] = contador\n", + " contador = contador + 1\n", + "\n", + " def get_ranking(col1):\n", + " item = df_aux[df_aux[col] == col1][col].values[0]\n", + " return rank[item]\n", + " \n", + " df['ranking_en_' + col] = df.apply(lambda x: get_ranking(x[col]), axis=1)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 153, "metadata": { "scrolled": false }, @@ -91,12 +128,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: \n", + "/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " if sys.path[0] == '':\n" + " \n", + "/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:28: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" ] }, { @@ -140,7 +182,7 @@ " centroscomercialescercanos\n", " precio\n", " cantidad_tipodepropiedad_provincia\n", - " cantidad_propiedades_en_provincia\n", + " ranking_en_provincia\n", " \n", " \n", " \n", @@ -166,7 +208,7 @@ " 0.0\n", " 2273000.0\n", " 3\n", - " 6\n", + " 1\n", " \n", " \n", " 1\n", @@ -190,7 +232,7 @@ " 1.0\n", " 3600000.0\n", " 2\n", - " 6\n", + " 1\n", " \n", " \n", " 2\n", @@ -214,7 +256,7 @@ " 0.0\n", " 1200000.0\n", " 2\n", - " 5\n", + " 3\n", " \n", " \n", " 3\n", @@ -238,7 +280,7 @@ " 1.0\n", " 650000.0\n", " 2\n", - " 3\n", + " 4\n", " \n", " \n", " 4\n", @@ -262,7 +304,7 @@ " 0.0\n", " 1150000.0\n", " 1\n", - " 5\n", + " 3\n", " \n", " \n", " 5\n", @@ -286,7 +328,7 @@ " 1.0\n", " 1100000.0\n", " 3\n", - " 6\n", + " 1\n", " \n", " \n", " 6\n", @@ -310,7 +352,7 @@ " 0.0\n", " 1150000.0\n", " 1\n", - " 1\n", + " 8\n", " \n", " \n", " 7\n", @@ -334,7 +376,7 @@ " 0.0\n", " 4200000.0\n", " 1\n", - " 2\n", + " 6\n", " \n", " \n", " 8\n", @@ -358,7 +400,7 @@ " 1.0\n", " 310000.0\n", " 1\n", - " 1\n", + " 7\n", " \n", " \n", " 9\n", @@ -382,7 +424,7 @@ " 0.0\n", " 6200000.0\n", " 1\n", - " 3\n", + " 4\n", " \n", " \n", " 10\n", @@ -406,7 +448,7 @@ " 1.0\n", " 488000.0\n", " 1\n", - " 1\n", + " 10\n", " \n", " \n", " 11\n", @@ -430,7 +472,7 @@ " 0.0\n", " 7200000.0\n", " 1\n", - " 3\n", + " 5\n", " \n", " \n", " 12\n", @@ -454,7 +496,7 @@ " 1.0\n", " 1500000.0\n", " 4\n", - " 6\n", + " 2\n", " \n", " \n", " 13\n", @@ -478,7 +520,7 @@ " 0.0\n", " 3000000.0\n", " 1\n", - " 6\n", + " 1\n", " \n", " \n", " 14\n", @@ -502,7 +544,7 @@ " 0.0\n", " 5300000.0\n", " 2\n", - " 5\n", + " 3\n", " \n", " \n", " 15\n", @@ -526,7 +568,7 @@ " 0.0\n", " 3650000.0\n", " 2\n", - " 3\n", + " 4\n", " \n", " \n", " 16\n", @@ -550,7 +592,7 @@ " 1.0\n", " 2450000.0\n", " 4\n", - " 6\n", + " 2\n", " \n", " \n", " 17\n", @@ -574,7 +616,7 @@ " 1.0\n", " 4750000.0\n", " 3\n", - " 6\n", + " 1\n", " \n", " \n", " 18\n", @@ -598,7 +640,7 @@ " 1.0\n", " 794078.0\n", " 1\n", - " 6\n", + " 2\n", " \n", " \n", " 19\n", @@ -622,7 +664,7 @@ " 0.0\n", " 1850000.0\n", " 4\n", - " 6\n", + " 2\n", " \n", " \n", " 20\n", @@ -646,7 +688,7 @@ " 0.0\n", " 490000.0\n", " 1\n", - " 3\n", + " 5\n", " \n", " \n", " 21\n", @@ -670,7 +712,7 @@ " 0.0\n", " 2300000.0\n", " 1\n", - " 6\n", + " 2\n", " \n", " \n", " 22\n", @@ -694,7 +736,7 @@ " 0.0\n", " 1340000.0\n", " 1\n", - " 1\n", + " 11\n", " \n", " \n", " 23\n", @@ -718,7 +760,7 @@ " 0.0\n", " 3500000.0\n", " 1\n", - " 2\n", + " 6\n", " \n", " \n", " 24\n", @@ -742,7 +784,7 @@ " 1.0\n", " 3900000.0\n", " 4\n", - " 6\n", + " 2\n", " \n", " \n", " 25\n", @@ -766,7 +808,7 @@ " 1.0\n", " 850000.0\n", " 1\n", - " 3\n", + " 5\n", " \n", " \n", " 26\n", @@ -790,7 +832,7 @@ " 1.0\n", " 310000.0\n", " 1\n", - " 1\n", + " 9\n", " \n", " \n", " 27\n", @@ -814,7 +856,7 @@ " 0.0\n", " 10800000.0\n", " 2\n", - " 6\n", + " 1\n", " \n", " \n", " 28\n", @@ -838,7 +880,7 @@ " 0.0\n", " 2600000.0\n", " 1\n", - " 5\n", + " 3\n", " \n", " \n", " 29\n", @@ -862,7 +904,7 @@ " 0.0\n", " 10707900.0\n", " 1\n", - " 5\n", + " 3\n", " \n", " \n", "\n", @@ -1062,42 +1104,42 @@ "28 0.0 2600000.0 1 \n", "29 0.0 10707900.0 1 \n", "\n", - " cantidad_propiedades_en_provincia \n", - "0 6 \n", - "1 6 \n", - "2 5 \n", - "3 3 \n", - "4 5 \n", - "5 6 \n", - "6 1 \n", - "7 2 \n", - "8 1 \n", - "9 3 \n", - "10 1 \n", - "11 3 \n", - "12 6 \n", - "13 6 \n", - "14 5 \n", - "15 3 \n", - "16 6 \n", - "17 6 \n", - "18 6 \n", - "19 6 \n", - "20 3 \n", - "21 6 \n", - "22 1 \n", - "23 2 \n", - "24 6 \n", - "25 3 \n", - "26 1 \n", - "27 6 \n", - "28 5 \n", - "29 5 \n", + " ranking_en_provincia \n", + "0 1 \n", + "1 1 \n", + "2 3 \n", + "3 4 \n", + "4 3 \n", + "5 1 \n", + "6 8 \n", + "7 6 \n", + "8 7 \n", + "9 4 \n", + "10 10 \n", + "11 5 \n", + "12 2 \n", + "13 1 \n", + "14 3 \n", + "15 4 \n", + "16 2 \n", + "17 1 \n", + "18 2 \n", + "19 2 \n", + "20 5 \n", + "21 2 \n", + "22 11 \n", + "23 6 \n", + "24 2 \n", + "25 5 \n", + "26 9 \n", + "27 1 \n", + "28 3 \n", + "29 3 \n", "\n", "[30 rows x 25 columns]" ] }, - "execution_count": 98, + "execution_count": 153, "metadata": {}, "output_type": "execute_result" } @@ -1108,7 +1150,8 @@ "\n", "# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", "# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", - "cantidad_atributo(df, 'provincia')" + "# cantidad_atributo(df, 'provincia')\n", + "ranking_atributo(df, 'provincia')" ] } ], diff --git a/html/features_complementarias.html b/html/features_complementarias.html index 404fd93..54c4d29 100644 --- a/html/features_complementarias.html +++ b/html/features_complementarias.html @@ -13147,13 +13147,13 @@
-
In [97]:
+
In [107]:
def cantidad_atributo(df, col):
     '''Simil a value_counts. Dada una columna, agrega al df, cuantas propiedades existen con esa misma propiedad'''
     df_aux = df.copy()
-    df_aux = df_aux.groupby(['provincia']).agg({'id': 'count'})
+    df_aux = df_aux.groupby([col]).agg({'id': 'count'})
     df_aux = df_aux.rename(columns={'id':'cantidad'})
     df_aux = df_aux.reset_index()
 
@@ -13172,7 +13172,48 @@
 
-
In [98]:
+
In [151]:
+
+
+
def ranking_atributo(df, col):
+    '''Cuenta cuantas propiedades hay con los atributos de la columna recibida. Y para cada propiedad,
+    se fija en que posicion esta del ranking. Por ejemplo, si hay dos propuedades, una con dos banos y otra 
+    con uno solo, el rankin de las que tengan dos banos en 1, y de las propiedades que tengan un solo bano es 
+    dos'''
+    
+    '''Rellena los nulos con "otros" '''
+    df[col] = df[col].fillna('otros')
+    
+    df_aux = df.copy()
+    df_aux = df_aux.groupby([col]).agg({'id': 'count'})
+    df_aux = df_aux.rename(columns={'id':'cantidad'})
+    df_aux = df_aux.reset_index()
+    
+    df_aux = df_aux.nlargest(df_aux.shape[0], 'cantidad') 
+    lista_ranking = df_aux[col].tolist()
+    rank = {}
+    contador = 1
+    
+    for item in lista_ranking:
+        rank[item] = contador
+        contador = contador + 1
+
+    def get_ranking(col1):
+        item = df_aux[df_aux[col] == col1][col].values[0]
+        return rank[item]
+    
+    df['ranking_en_' + col] = df.apply(lambda x: get_ranking(x[col]), axis=1)
+    return df
+
+ +
+
+
+ +
+
+
+
In [153]:
df = df_train.head(30)
@@ -13180,7 +13221,8 @@
 
 # concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
 # cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
-cantidad_atributo(df, 'provincia')
+# cantidad_atributo(df, 'provincia')
+ranking_atributo(df, 'provincia')
 
@@ -13197,19 +13239,24 @@
-
/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: 
+
/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame.
+Try using .loc[row_indexer,col_indexer] = value instead
+
+See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  
+/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:28: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame.
 Try using .loc[row_indexer,col_indexer] = value instead
 
 See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
-  if sys.path[0] == '':
 
-
Out[98]:
+
Out[153]:
@@ -13252,7 +13299,7 @@ centroscomercialescercanos precio cantidad_tipodepropiedad_provincia - cantidad_propiedades_en_provincia + ranking_en_provincia @@ -13278,7 +13325,7 @@ 0.0 2273000.0 3 - 6 + 1 1 @@ -13302,7 +13349,7 @@ 1.0 3600000.0 2 - 6 + 1 2 @@ -13326,7 +13373,7 @@ 0.0 1200000.0 2 - 5 + 3 3 @@ -13350,7 +13397,7 @@ 1.0 650000.0 2 - 3 + 4 4 @@ -13374,7 +13421,7 @@ 0.0 1150000.0 1 - 5 + 3 5 @@ -13398,7 +13445,7 @@ 1.0 1100000.0 3 - 6 + 1 6 @@ -13422,7 +13469,7 @@ 0.0 1150000.0 1 - 1 + 8 7 @@ -13446,7 +13493,7 @@ 0.0 4200000.0 1 - 2 + 6 8 @@ -13470,7 +13517,7 @@ 1.0 310000.0 1 - 1 + 7 9 @@ -13494,7 +13541,7 @@ 0.0 6200000.0 1 - 3 + 4 10 @@ -13518,7 +13565,7 @@ 1.0 488000.0 1 - 1 + 10 11 @@ -13542,7 +13589,7 @@ 0.0 7200000.0 1 - 3 + 5 12 @@ -13566,7 +13613,7 @@ 1.0 1500000.0 4 - 6 + 2 13 @@ -13590,7 +13637,7 @@ 0.0 3000000.0 1 - 6 + 1 14 @@ -13614,7 +13661,7 @@ 0.0 5300000.0 2 - 5 + 3 15 @@ -13638,7 +13685,7 @@ 0.0 3650000.0 2 - 3 + 4 16 @@ -13662,7 +13709,7 @@ 1.0 2450000.0 4 - 6 + 2 17 @@ -13686,7 +13733,7 @@ 1.0 4750000.0 3 - 6 + 1 18 @@ -13710,7 +13757,7 @@ 1.0 794078.0 1 - 6 + 2 19 @@ -13734,7 +13781,7 @@ 0.0 1850000.0 4 - 6 + 2 20 @@ -13758,7 +13805,7 @@ 0.0 490000.0 1 - 3 + 5 21 @@ -13782,7 +13829,7 @@ 0.0 2300000.0 1 - 6 + 2 22 @@ -13806,7 +13853,7 @@ 0.0 1340000.0 1 - 1 + 11 23 @@ -13830,7 +13877,7 @@ 0.0 3500000.0 1 - 2 + 6 24 @@ -13854,7 +13901,7 @@ 1.0 3900000.0 4 - 6 + 2 25 @@ -13878,7 +13925,7 @@ 1.0 850000.0 1 - 3 + 5 26 @@ -13902,7 +13949,7 @@ 1.0 310000.0 1 - 1 + 9 27 @@ -13926,7 +13973,7 @@ 0.0 10800000.0 2 - 6 + 1 28 @@ -13950,7 +13997,7 @@ 0.0 2600000.0 1 - 5 + 3 29 @@ -13974,7 +14021,7 @@ 0.0 10707900.0 1 - 5 + 3 From 6ce9d333c1442481d54508ac3fb2ad7ef92f10a5 Mon Sep 17 00:00:00 2001 From: tlofano Date: Sat, 23 Nov 2019 15:48:58 -0300 Subject: [PATCH 6/8] Funcion generadora --- features_complementarias.ipynb | 1088 ++-------------------------- html/features_complementarias.html | 867 +++------------------- 2 files changed, 162 insertions(+), 1793 deletions(-) diff --git a/features_complementarias.ipynb b/features_complementarias.ipynb index 3c4f175..d3bec33 100644 --- a/features_complementarias.ipynb +++ b/features_complementarias.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -119,11 +119,25 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 13, "metadata": { "scrolled": false }, "outputs": [ + { + "data": { + "text/plain": [ + "Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',\n", + " 'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',\n", + " 'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',\n", + " 'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',\n", + " 'centroscomercialescercanos', 'precio'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stderr", "output_type": "stream", @@ -140,1018 +154,74 @@ "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtitulodescripciontipodepropiedaddireccionciudadprovinciaantiguedadhabitacionesgarages...lngfechagimnasiousosmultiplespiscinaescuelascercanascentroscomercialescercanospreciocantidad_tipodepropiedad_provinciaranking_en_provincia
0254099depto. tipo a-402depto. interior de 80.15m2, consta de sala com...ApartamentoAvenida Division del Norte 2005Benito JuárezDistrito FederalNaN2.01.0...NaN2015-08-23 00:00:000.00.00.00.00.02273000.031
153461condominio horizontal en venta<p>entre sonora y guerrero, atr&aacute;s del h...Casa en condominioAV. MEXICOLa Magdalena ContrerasDistrito Federal10.03.02.0...-99.2276552013-06-28 00:00:000.00.00.01.01.03600000.021
2247984casa en venta urbi 3 recamaras tonaladescripcion \\nla mejor ubicacion residencial e...CasaUrbi TonalaTonaláJalisco5.03.02.0...NaN2015-10-17 00:00:000.00.00.00.00.01200000.023
3209067casa sola en toluca zinacantepec con credito i...casa en privada con caseta de vigilancia casas...CasaIGNACIO MANUEL ALTAMIRANO 128ZinacantepecEdo. de México1.02.01.0...-99.6880152012-03-09 00:00:000.00.00.01.01.0650000.024
4185997paseos del solbonito departamento en excelentes condiciones ...ApartamentoPASEOS DEL SOLZapopanJalisco10.02.01.0...NaN2016-06-07 00:00:000.00.00.00.00.01150000.013
5126147departamento en venta taxqueñaamplio departamento, estancia de sala y comedo...ApartamentoCondominio Tlalpan 2BCoyoacánDistrito Federal5.02.01.0...-99.1484752014-03-18 00:00:000.00.00.00.01.01100000.031
6139233de oportunidad casa en san lorenzoubicada en esquina, pertenece san lorenzo agen...CasaNaNOaxaca de JuárezOaxacaNaN3.01.0...-96.8035042016-02-23 00:00:000.00.00.00.00.01150000.018
75013casa emilia en venta en selvamar playa del carmencasa emilia en venta playa del carmenfracciona...Casacondominio el trebolPlaya del CarmenQuintana Roo2.04.02.0...-87.0379682016-10-20 00:00:000.00.00.00.00.04200000.016
844962pre- venta preciosos depas 2 recamaras con sub...<p>pre-venta de preciosos departamento ecologi...ApartamentoBUENAVISTA DEPTOS CON SUBSIDIOVilla de AlvarezColima1.02.01.0...NaN2014-01-06 00:00:000.00.00.01.01.0310000.017
9134537terrenoterreno de 5.500m2 bardeado, uso de suelo h-20...TerrenoAv. MorelosIxtapalucaEdo. de MéxicoNaNNaNNaN...-98.8870002016-12-22 00:00:000.00.00.00.00.06200000.014
1051180mi 2° credito<p>mi 2&deg; credito<br />para todos aquellos ...CasaMI 2° CREDITOSan Luis PotosíSan luis Potosí0.03.01.0...-101.0189562013-01-03 00:00:000.00.00.01.01.0488000.0110
11103293departamento en venta, san pedro garza garcia,...departamento nuevo ,256 m2 de construccion,un ...ApartamentoNaNSan Pedro Garza GarcíaNuevo LeónNaN3.02.0...NaN2014-12-01 00:00:000.00.00.00.00.07200000.015
1247890hermosa casa en zibata queretaro 170m²<p><strong>casa </strong>en venta en fracciona...CasaPitahayas 2 - 58, ZibataQuerétaroQuerétaro0.03.02.0...-100.3175032013-08-12 00:00:000.01.00.01.01.01500000.042
13130216casa matias romero - sión bancariaflamante casa habitación en condominio horizon...CasaMATIAS ROMERO, COLONIA DEL VALLEBenito JuárezDistrito Federal20.0NaN0.0...-99.1495192015-03-31 00:00:000.00.00.00.00.03000000.011
14181436casa en venta en valle realpre venta de residencia en la toscana con exce...CasaNaNZapopanJaliscoNaNNaN2.0...-103.4319932014-11-25 00:00:000.00.00.00.00.05300000.023
15201923casa en venta lomas verdes naucalpan<p>casa en dos niveles, 3 recámaras la princip...CasaNaNNaucalpan de JuárezEdo. de MéxicoNaN3.02.0...-99.2560282015-01-13 00:00:000.00.00.00.00.03650000.024
16283945preciosa casa en cumbres del lago \\t<p>clave: vcln2450 fecha de actualizaci&oacute...CasaCUMBRES DEL LAGO JURIQUILLA, QUERETARO, MEXICOQuerétaroQuerétaro0.03.02.0...-100.4593032013-03-06 00:00:000.00.00.01.01.02450000.042
1773348oportunidad, departamento col del valle, 3 rec...magnifico departamento con excelente distribuc...ApartamentoPazaje Santa CruzBenito JuárezDistrito Federal5.03.02.0...NaN2016-10-30 00:00:000.00.00.01.01.04750000.031
1840421últimos terrenos en preventa!! en bukara coto ...terreno ubicado en bukara coto club, el cual s...TerrenoMilenioQuerétaroQuerétaroNaNNaN0.0...NaN2016-12-03 00:00:000.00.01.01.01.0794078.012
1987655NaN-- rcv151104-lv-27 -- linda casa con area d...CasaSENDA CELESTIAL 10QuerétaroQuerétaro0.03.02.0...NaN2016-01-15 00:00:000.00.00.00.00.01850000.042
2090071terreno residencial en venta en cumbres quinta...<p>excelente terreno plano, frente a parque en...TerrenoTERRENO RESIDENCIAL EN VENTA EN CUMBRESMonterreyNuevo León0.0NaN0.0...NaN2013-12-02 00:00:000.00.00.00.00.0490000.015
21224513casa en venta en querétaronocnok id: mx15-bk0340. hermosa casa en conjun...Casa en condominioNaNQuerétaroQuerétaroNaN3.02.0...NaN2016-02-01 00:00:000.00.00.00.00.02300000.012
22129818comoda casa con recamara en planta baja y ampl...<p>comoda casa con recamara en planta baja y a...CasaNaNCiudad MaderoTamaulipas0.04.0NaN...-97.8410122013-12-12 00:00:000.00.00.00.00.01340000.0111
23146699casa en condominio en venta, benito juarez, ca...casa en venta en cancun en residencial cumbres...Casa en condominioNaNCancúnQuintana Roo4.03.02.0...-86.8463232015-05-19 00:00:000.00.00.00.00.03500000.016
24235958casa en venta - colinas del cimatario, queréta...<p>propiedad en un nivel, con espacios amplios...CasaColinas del Cimatario C721QuerétaroQuerétaro4.03.0NaN...-100.3673822013-09-26 00:00:000.00.00.01.01.03900000.042
2597918casa venta santa ceciliahermosa casa en santa cecilia en apodaca, cerc...CasaLopez VelardeApodacaNuevo León8.03.02.0...NaN2016-12-31 00:00:000.00.00.01.01.0850000.015
2670294casa en venta de un nivel , frac. misiones de ...¡excelente oportunidad! \\n\\ncasa de un nivel ...Casa en condominioMISIONES DE SAN FRANCISCOPueblaPuebla3.02.01.0...NaN2014-07-22 00:00:000.00.00.01.01.0310000.019
27177031NaNhermosa casa remodelada, con acabados de lujo,...Casa en condominioPASEO DE LOS LAURELESMiguel HidalgoDistrito Federal18.03.03.0...NaN2012-03-07 00:00:000.00.00.00.00.010800000.021
28144635casa en venta en santillana icasa en coto 3 recamaras, estudio, sala-comedo...Casa en condominioCoto Santillana I Casa G11ZapopanJalisco1.03.02.0...-103.4538082015-01-20 00:00:000.00.00.00.00.02600000.013
29146158terreno urbano bardeado zona tesistánen zona de urbanización progresiva, terreno pl...Terreno comercialcamino viejo a TesistanZapopanJalisco20.0NaN0.0...-103.4770622016-12-03 00:00:000.00.00.01.00.010707900.013
\n", - "

30 rows × 25 columns

\n", - "
" - ], - "text/plain": [ - " id titulo \\\n", - "0 254099 depto. tipo a-402 \n", - "1 53461 condominio horizontal en venta \n", - "2 247984 casa en venta urbi 3 recamaras tonala \n", - "3 209067 casa sola en toluca zinacantepec con credito i... \n", - "4 185997 paseos del sol \n", - "5 126147 departamento en venta taxqueña \n", - "6 139233 de oportunidad casa en san lorenzo \n", - "7 5013 casa emilia en venta en selvamar playa del carmen \n", - "8 44962 pre- venta preciosos depas 2 recamaras con sub... \n", - "9 134537 terreno \n", - "10 51180 mi 2° credito \n", - "11 103293 departamento en venta, san pedro garza garcia,... \n", - "12 47890 hermosa casa en zibata queretaro 170m² \n", - "13 130216 casa matias romero - sión bancaria \n", - "14 181436 casa en venta en valle real \n", - "15 201923 casa en venta lomas verdes naucalpan \n", - "16 283945 preciosa casa en cumbres del lago \\t \n", - "17 73348 oportunidad, departamento col del valle, 3 rec... \n", - "18 40421 últimos terrenos en preventa!! en bukara coto ... \n", - "19 87655 NaN \n", - "20 90071 terreno residencial en venta en cumbres quinta... \n", - "21 224513 casa en venta en querétaro \n", - "22 129818 comoda casa con recamara en planta baja y ampl... \n", - "23 146699 casa en condominio en venta, benito juarez, ca... \n", - "24 235958 casa en venta - colinas del cimatario, queréta... \n", - "25 97918 casa venta santa cecilia \n", - "26 70294 casa en venta de un nivel , frac. misiones de ... \n", - "27 177031 NaN \n", - "28 144635 casa en venta en santillana i \n", - "29 146158 terreno urbano bardeado zona tesistán \n", - "\n", - " descripcion tipodepropiedad \\\n", - "0 depto. interior de 80.15m2, consta de sala com... Apartamento \n", - "1

entre sonora y guerrero, atrás del h... Casa en condominio \n", - "2 descripcion \\nla mejor ubicacion residencial e... Casa \n", - "3 casa en privada con caseta de vigilancia casas... Casa \n", - "4 bonito departamento en excelentes condiciones ... Apartamento \n", - "5 amplio departamento, estancia de sala y comedo... Apartamento \n", - "6 ubicada en esquina, pertenece san lorenzo agen... Casa \n", - "7 casa emilia en venta playa del carmenfracciona... Casa \n", - "8

pre-venta de preciosos departamento ecologi... Apartamento \n", - "9 terreno de 5.500m2 bardeado, uso de suelo h-20... Terreno \n", - "10

mi 2° credito
para todos aquellos ... Casa \n", - "11 departamento nuevo ,256 m2 de construccion,un ... Apartamento \n", - "12

casa en venta en fracciona... Casa \n", - "13 flamante casa habitación en condominio horizon... Casa \n", - "14 pre venta de residencia en la toscana con exce... Casa \n", - "15

casa en dos niveles, 3 recámaras la princip... Casa \n", - "16

clave: vcln2450 fecha de actualizació... Casa \n", - "17 magnifico departamento con excelente distribuc... Apartamento \n", - "18 terreno ubicado en bukara coto club, el cual s... Terreno \n", - "19 -- rcv151104-lv-27 -- linda casa con area d... Casa \n", - "20

excelente terreno plano, frente a parque en... Terreno \n", - "21 nocnok id: mx15-bk0340. hermosa casa en conjun... Casa en condominio \n", - "22

comoda casa con recamara en planta baja y a... Casa \n", - "23 casa en venta en cancun en residencial cumbres... Casa en condominio \n", - "24

propiedad en un nivel, con espacios amplios... Casa \n", - "25 hermosa casa en santa cecilia en apodaca, cerc... Casa \n", - "26 ¡excelente oportunidad! \\n\\ncasa de un nivel ... Casa en condominio \n", - "27 hermosa casa remodelada, con acabados de lujo,... Casa en condominio \n", - "28 casa en coto 3 recamaras, estudio, sala-comedo... Casa en condominio \n", - "29 en zona de urbanización progresiva, terreno pl... Terreno comercial \n", - "\n", - " direccion ciudad \\\n", - "0 Avenida Division del Norte 2005 Benito Juárez \n", - "1 AV. MEXICO La Magdalena Contreras \n", - "2 Urbi Tonala Tonalá \n", - "3 IGNACIO MANUEL ALTAMIRANO 128 Zinacantepec \n", - "4 PASEOS DEL SOL Zapopan \n", - "5 Condominio Tlalpan 2B Coyoacán \n", - "6 NaN Oaxaca de Juárez \n", - "7 condominio el trebol Playa del Carmen \n", - "8 BUENAVISTA DEPTOS CON SUBSIDIO Villa de Alvarez \n", - "9 Av. Morelos Ixtapaluca \n", - "10 MI 2° CREDITO San Luis Potosí \n", - "11 NaN San Pedro Garza García \n", - "12 Pitahayas 2 - 58, Zibata Querétaro \n", - "13 MATIAS ROMERO, COLONIA DEL VALLE Benito Juárez \n", - "14 NaN Zapopan \n", - "15 NaN Naucalpan de Juárez \n", - "16 CUMBRES DEL LAGO JURIQUILLA, QUERETARO, MEXICO Querétaro \n", - "17 Pazaje Santa Cruz Benito Juárez \n", - "18 Milenio Querétaro \n", - "19 SENDA CELESTIAL 10 Querétaro \n", - "20 TERRENO RESIDENCIAL EN VENTA EN CUMBRES Monterrey \n", - "21 NaN Querétaro \n", - "22 NaN Ciudad Madero \n", - "23 NaN Cancún \n", - "24 Colinas del Cimatario C721 Querétaro \n", - "25 Lopez Velarde Apodaca \n", - "26 MISIONES DE SAN FRANCISCO Puebla \n", - "27 PASEO DE LOS LAURELES Miguel Hidalgo \n", - "28 Coto Santillana I Casa G11 Zapopan \n", - "29 camino viejo a Tesistan Zapopan \n", - "\n", - " provincia antiguedad habitaciones garages ... lng \\\n", - "0 Distrito Federal NaN 2.0 1.0 ... NaN \n", - "1 Distrito Federal 10.0 3.0 2.0 ... -99.227655 \n", - "2 Jalisco 5.0 3.0 2.0 ... NaN \n", - "3 Edo. de México 1.0 2.0 1.0 ... -99.688015 \n", - "4 Jalisco 10.0 2.0 1.0 ... NaN \n", - "5 Distrito Federal 5.0 2.0 1.0 ... -99.148475 \n", - "6 Oaxaca NaN 3.0 1.0 ... -96.803504 \n", - "7 Quintana Roo 2.0 4.0 2.0 ... -87.037968 \n", - "8 Colima 1.0 2.0 1.0 ... NaN \n", - "9 Edo. de México NaN NaN NaN ... -98.887000 \n", - "10 San luis Potosí 0.0 3.0 1.0 ... -101.018956 \n", - "11 Nuevo León NaN 3.0 2.0 ... NaN \n", - "12 Querétaro 0.0 3.0 2.0 ... -100.317503 \n", - "13 Distrito Federal 20.0 NaN 0.0 ... -99.149519 \n", - "14 Jalisco NaN NaN 2.0 ... -103.431993 \n", - "15 Edo. de México NaN 3.0 2.0 ... -99.256028 \n", - "16 Querétaro 0.0 3.0 2.0 ... -100.459303 \n", - "17 Distrito Federal 5.0 3.0 2.0 ... NaN \n", - "18 Querétaro NaN NaN 0.0 ... NaN \n", - "19 Querétaro 0.0 3.0 2.0 ... NaN \n", - "20 Nuevo León 0.0 NaN 0.0 ... NaN \n", - "21 Querétaro NaN 3.0 2.0 ... NaN \n", - "22 Tamaulipas 0.0 4.0 NaN ... -97.841012 \n", - "23 Quintana Roo 4.0 3.0 2.0 ... -86.846323 \n", - "24 Querétaro 4.0 3.0 NaN ... -100.367382 \n", - "25 Nuevo León 8.0 3.0 2.0 ... NaN \n", - "26 Puebla 3.0 2.0 1.0 ... NaN \n", - "27 Distrito Federal 18.0 3.0 3.0 ... NaN \n", - "28 Jalisco 1.0 3.0 2.0 ... -103.453808 \n", - "29 Jalisco 20.0 NaN 0.0 ... -103.477062 \n", - "\n", - " fecha gimnasio usosmultiples piscina escuelascercanas \\\n", - "0 2015-08-23 00:00:00 0.0 0.0 0.0 0.0 \n", - "1 2013-06-28 00:00:00 0.0 0.0 0.0 1.0 \n", - "2 2015-10-17 00:00:00 0.0 0.0 0.0 0.0 \n", - "3 2012-03-09 00:00:00 0.0 0.0 0.0 1.0 \n", - "4 2016-06-07 00:00:00 0.0 0.0 0.0 0.0 \n", - "5 2014-03-18 00:00:00 0.0 0.0 0.0 0.0 \n", - "6 2016-02-23 00:00:00 0.0 0.0 0.0 0.0 \n", - "7 2016-10-20 00:00:00 0.0 0.0 0.0 0.0 \n", - "8 2014-01-06 00:00:00 0.0 0.0 0.0 1.0 \n", - "9 2016-12-22 00:00:00 0.0 0.0 0.0 0.0 \n", - "10 2013-01-03 00:00:00 0.0 0.0 0.0 1.0 \n", - "11 2014-12-01 00:00:00 0.0 0.0 0.0 0.0 \n", - "12 2013-08-12 00:00:00 0.0 1.0 0.0 1.0 \n", - "13 2015-03-31 00:00:00 0.0 0.0 0.0 0.0 \n", - "14 2014-11-25 00:00:00 0.0 0.0 0.0 0.0 \n", - "15 2015-01-13 00:00:00 0.0 0.0 0.0 0.0 \n", - "16 2013-03-06 00:00:00 0.0 0.0 0.0 1.0 \n", - "17 2016-10-30 00:00:00 0.0 0.0 0.0 1.0 \n", - "18 2016-12-03 00:00:00 0.0 0.0 1.0 1.0 \n", - "19 2016-01-15 00:00:00 0.0 0.0 0.0 0.0 \n", - "20 2013-12-02 00:00:00 0.0 0.0 0.0 0.0 \n", - "21 2016-02-01 00:00:00 0.0 0.0 0.0 0.0 \n", - "22 2013-12-12 00:00:00 0.0 0.0 0.0 0.0 \n", - "23 2015-05-19 00:00:00 0.0 0.0 0.0 0.0 \n", - "24 2013-09-26 00:00:00 0.0 0.0 0.0 1.0 \n", - "25 2016-12-31 00:00:00 0.0 0.0 0.0 1.0 \n", - "26 2014-07-22 00:00:00 0.0 0.0 0.0 1.0 \n", - "27 2012-03-07 00:00:00 0.0 0.0 0.0 0.0 \n", - "28 2015-01-20 00:00:00 0.0 0.0 0.0 0.0 \n", - "29 2016-12-03 00:00:00 0.0 0.0 0.0 1.0 \n", - "\n", - " centroscomercialescercanos precio cantidad_tipodepropiedad_provincia \\\n", - "0 0.0 2273000.0 3 \n", - "1 1.0 3600000.0 2 \n", - "2 0.0 1200000.0 2 \n", - "3 1.0 650000.0 2 \n", - "4 0.0 1150000.0 1 \n", - "5 1.0 1100000.0 3 \n", - "6 0.0 1150000.0 1 \n", - "7 0.0 4200000.0 1 \n", - "8 1.0 310000.0 1 \n", - "9 0.0 6200000.0 1 \n", - "10 1.0 488000.0 1 \n", - "11 0.0 7200000.0 1 \n", - "12 1.0 1500000.0 4 \n", - "13 0.0 3000000.0 1 \n", - "14 0.0 5300000.0 2 \n", - "15 0.0 3650000.0 2 \n", - "16 1.0 2450000.0 4 \n", - "17 1.0 4750000.0 3 \n", - "18 1.0 794078.0 1 \n", - "19 0.0 1850000.0 4 \n", - "20 0.0 490000.0 1 \n", - "21 0.0 2300000.0 1 \n", - "22 0.0 1340000.0 1 \n", - "23 0.0 3500000.0 1 \n", - "24 1.0 3900000.0 4 \n", - "25 1.0 850000.0 1 \n", - "26 1.0 310000.0 1 \n", - "27 0.0 10800000.0 2 \n", - "28 0.0 2600000.0 1 \n", - "29 0.0 10707900.0 1 \n", - "\n", - " ranking_en_provincia \n", - "0 1 \n", - "1 1 \n", - "2 3 \n", - "3 4 \n", - "4 3 \n", - "5 1 \n", - "6 8 \n", - "7 6 \n", - "8 7 \n", - "9 4 \n", - "10 10 \n", - "11 5 \n", - "12 2 \n", - "13 1 \n", - "14 3 \n", - "15 4 \n", - "16 2 \n", - "17 1 \n", - "18 2 \n", - "19 2 \n", - "20 5 \n", - "21 2 \n", - "22 11 \n", - "23 6 \n", - "24 2 \n", - "25 5 \n", - "26 9 \n", - "27 1 \n", - "28 3 \n", - "29 3 \n", - "\n", - "[30 rows x 25 columns]" - ] - }, - "execution_count": 153, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "df = df_train.head(30)\n", + "df = df_train.head()\n", "# df = df_train\n", "\n", + "#Ejs:\n", "# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", "# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", "# cantidad_atributo(df, 'provincia')\n", - "ranking_atributo(df, 'provincia')" + "# ranking_atributo(df, 'provincia')\n", + "\n", + "def aplicar_features(df):\n", + " df = concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", + " df = concatenar_categoricos(df, 'provincia', 'ciudad')\n", + " # -- \n", + " display('Check-1')\n", + " df = cantidad_atributo(df, 'provincia')\n", + " df = cantidad_atributo(df, 'ciudad')\n", + " df = cantidad_atributo(df, 'antiguedad')\n", + " df = cantidad_atributo(df, 'habitaciones')\n", + " df = cantidad_atributo(df, 'garages')\n", + " df = cantidad_atributo(df, 'banos')\n", + " df = cantidad_atributo(df, 'gimnasio')\n", + " df = cantidad_atributo(df, 'usosmultiples')\n", + " df = cantidad_atributo(df, 'escuelascercanas')\n", + " df = cantidad_atributo(df, 'centroscomercialescercanos')\n", + " df = cantidad_atributo(df, 'piscina')\n", + " # --\n", + " display('Check-2')\n", + " df = ranking_atributo(df, 'provincia')\n", + " df = ranking_atributo(df, 'tipodepropiedad')\n", + " df = ranking_atributo(df, 'ciudad')\n", + " df = ranking_atributo(df, 'antiguedad')\n", + " df = ranking_atributo(df, 'habitaciones')\n", + " df = ranking_atributo(df, 'garages')\n", + " df = ranking_atributo(df, 'banos')\n", + " df = ranking_atributo(df, 'gimasio')\n", + " df = ranking_atributo(df, 'usosmultiples')\n", + " df = ranking_atributo(df, 'escuelascercanas')\n", + " df = ranking_atributo(df, 'centrocomercialescercanos')\n", + " # --\n", + " display('Check-3')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'ciudad')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'antiguedad')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'habitaciones')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'garages')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'banos')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'gimnasio')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'usosmultiples')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'pisicina')\n", + " display('Check-4')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'antiguedad')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'habitaciones')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'garages')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'banos')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'gimnasio')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'usosmultiples')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'ciudad')\n", + " display('Check-5')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'antiguedad')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'habitaciones')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'garages')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'banos')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'gimnasio')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'usosmultiples')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'pisina')" ] } ], diff --git a/html/features_complementarias.html b/html/features_complementarias.html index 54c4d29..5b8e540 100644 --- a/html/features_complementarias.html +++ b/html/features_complementarias.html @@ -13092,7 +13092,7 @@

-
In [2]:
+
In [4]:
def concatenar_categoricos(df, columna1, columna2, N=0, df_aux=None, devolver_cols=False):
@@ -13118,7 +13118,7 @@
 
-
In [88]:
+
In [5]:
def cantidad_propiedad_misma_cantidad(df, agrupar_col, misma_cantidad_col):
@@ -13147,7 +13147,7 @@
 
-
In [107]:
+
In [6]:
def cantidad_atributo(df, col):
@@ -13172,7 +13172,7 @@
 
-
In [151]:
+
In [7]:
def ranking_atributo(df, col):
@@ -13213,16 +13213,74 @@
 
-
In [153]:
+
In [13]:
-
df = df_train.head(30)
+
df = df_train.head()
 # df = df_train
 
+#Ejs:
 # concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
 # cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
 # cantidad_atributo(df, 'provincia')
-ranking_atributo(df, 'provincia')
+# ranking_atributo(df, 'provincia')
+
+def aplicar_features(df):
+    df = concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
+    df = concatenar_categoricos(df, 'provincia', 'ciudad')
+    # -- 
+    display('Check-1')
+    df = cantidad_atributo(df, 'provincia')
+    df = cantidad_atributo(df, 'ciudad')
+    df = cantidad_atributo(df, 'antiguedad')
+    df = cantidad_atributo(df, 'habitaciones')
+    df = cantidad_atributo(df, 'garages')
+    df = cantidad_atributo(df, 'banos')
+    df = cantidad_atributo(df, 'gimnasio')
+    df = cantidad_atributo(df, 'usosmultiples')
+    df = cantidad_atributo(df, 'escuelascercanas')
+    df = cantidad_atributo(df, 'centroscomercialescercanos')
+    df = cantidad_atributo(df, 'piscina')
+    # --
+    display('Check-2')
+    df = ranking_atributo(df, 'provincia')
+    df = ranking_atributo(df, 'tipodepropiedad')
+    df = ranking_atributo(df, 'ciudad')
+    df = ranking_atributo(df, 'antiguedad')
+    df = ranking_atributo(df, 'habitaciones')
+    df = ranking_atributo(df, 'garages')
+    df = ranking_atributo(df, 'banos')
+    df = ranking_atributo(df, 'gimasio')
+    df = ranking_atributo(df, 'usosmultiples')
+    df = ranking_atributo(df, 'escuelascercanas')
+    df = ranking_atributo(df, 'centrocomercialescercanos')
+    # --
+    display('Check-3')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'ciudad')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'antiguedad')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'habitaciones')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'garages')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'banos')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'gimnasio')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'usosmultiples')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'pisicina')
+    display('Check-4')
+    df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'antiguedad')
+    df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'habitaciones')
+    df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'garages')
+    df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'banos')
+    df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'gimnasio')
+    df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'usosmultiples')
+    df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'ciudad')
+    display('Check-5')
+    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'antiguedad')
+    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'habitaciones')
+    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'garages')
+    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'banos')
+    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'gimnasio')
+    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'usosmultiples')
+    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'pisina')
 
@@ -13238,6 +13296,24 @@
+ + +
+
Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',
+       'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',
+       'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',
+       'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',
+       'centroscomercialescercanos', 'precio'],
+      dtype='object')
+
+ +
+ +
+ +
+ +
/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame.
@@ -13254,783 +13330,6 @@
 
-
- -
Out[153]:
- - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtitulodescripciontipodepropiedaddireccionciudadprovinciaantiguedadhabitacionesgarages...lngfechagimnasiousosmultiplespiscinaescuelascercanascentroscomercialescercanospreciocantidad_tipodepropiedad_provinciaranking_en_provincia
0254099depto. tipo a-402depto. interior de 80.15m2, consta de sala com...ApartamentoAvenida Division del Norte 2005Benito JuárezDistrito FederalNaN2.01.0...NaN2015-08-23 00:00:000.00.00.00.00.02273000.031
153461condominio horizontal en venta<p>entre sonora y guerrero, atr&aacute;s del h...Casa en condominioAV. MEXICOLa Magdalena ContrerasDistrito Federal10.03.02.0...-99.2276552013-06-28 00:00:000.00.00.01.01.03600000.021
2247984casa en venta urbi 3 recamaras tonaladescripcion \nla mejor ubicacion residencial e...CasaUrbi TonalaTonaláJalisco5.03.02.0...NaN2015-10-17 00:00:000.00.00.00.00.01200000.023
3209067casa sola en toluca zinacantepec con credito i...casa en privada con caseta de vigilancia casas...CasaIGNACIO MANUEL ALTAMIRANO 128ZinacantepecEdo. de México1.02.01.0...-99.6880152012-03-09 00:00:000.00.00.01.01.0650000.024
4185997paseos del solbonito departamento en excelentes condiciones ...ApartamentoPASEOS DEL SOLZapopanJalisco10.02.01.0...NaN2016-06-07 00:00:000.00.00.00.00.01150000.013
5126147departamento en venta taxqueñaamplio departamento, estancia de sala y comedo...ApartamentoCondominio Tlalpan 2BCoyoacánDistrito Federal5.02.01.0...-99.1484752014-03-18 00:00:000.00.00.00.01.01100000.031
6139233de oportunidad casa en san lorenzoubicada en esquina, pertenece san lorenzo agen...CasaNaNOaxaca de JuárezOaxacaNaN3.01.0...-96.8035042016-02-23 00:00:000.00.00.00.00.01150000.018
75013casa emilia en venta en selvamar playa del carmencasa emilia en venta playa del carmenfracciona...Casacondominio el trebolPlaya del CarmenQuintana Roo2.04.02.0...-87.0379682016-10-20 00:00:000.00.00.00.00.04200000.016
844962pre- venta preciosos depas 2 recamaras con sub...<p>pre-venta de preciosos departamento ecologi...ApartamentoBUENAVISTA DEPTOS CON SUBSIDIOVilla de AlvarezColima1.02.01.0...NaN2014-01-06 00:00:000.00.00.01.01.0310000.017
9134537terrenoterreno de 5.500m2 bardeado, uso de suelo h-20...TerrenoAv. MorelosIxtapalucaEdo. de MéxicoNaNNaNNaN...-98.8870002016-12-22 00:00:000.00.00.00.00.06200000.014
1051180mi 2° credito<p>mi 2&deg; credito<br />para todos aquellos ...CasaMI 2° CREDITOSan Luis PotosíSan luis Potosí0.03.01.0...-101.0189562013-01-03 00:00:000.00.00.01.01.0488000.0110
11103293departamento en venta, san pedro garza garcia,...departamento nuevo ,256 m2 de construccion,un ...ApartamentoNaNSan Pedro Garza GarcíaNuevo LeónNaN3.02.0...NaN2014-12-01 00:00:000.00.00.00.00.07200000.015
1247890hermosa casa en zibata queretaro 170m²<p><strong>casa </strong>en venta en fracciona...CasaPitahayas 2 - 58, ZibataQuerétaroQuerétaro0.03.02.0...-100.3175032013-08-12 00:00:000.01.00.01.01.01500000.042
13130216casa matias romero - sión bancariaflamante casa habitación en condominio horizon...CasaMATIAS ROMERO, COLONIA DEL VALLEBenito JuárezDistrito Federal20.0NaN0.0...-99.1495192015-03-31 00:00:000.00.00.00.00.03000000.011
14181436casa en venta en valle realpre venta de residencia en la toscana con exce...CasaNaNZapopanJaliscoNaNNaN2.0...-103.4319932014-11-25 00:00:000.00.00.00.00.05300000.023
15201923casa en venta lomas verdes naucalpan<p>casa en dos niveles, 3 recámaras la princip...CasaNaNNaucalpan de JuárezEdo. de MéxicoNaN3.02.0...-99.2560282015-01-13 00:00:000.00.00.00.00.03650000.024
16283945preciosa casa en cumbres del lago \t<p>clave: vcln2450 fecha de actualizaci&oacute...CasaCUMBRES DEL LAGO JURIQUILLA, QUERETARO, MEXICOQuerétaroQuerétaro0.03.02.0...-100.4593032013-03-06 00:00:000.00.00.01.01.02450000.042
1773348oportunidad, departamento col del valle, 3 rec...magnifico departamento con excelente distribuc...ApartamentoPazaje Santa CruzBenito JuárezDistrito Federal5.03.02.0...NaN2016-10-30 00:00:000.00.00.01.01.04750000.031
1840421últimos terrenos en preventa!! en bukara coto ...terreno ubicado en bukara coto club, el cual s...TerrenoMilenioQuerétaroQuerétaroNaNNaN0.0...NaN2016-12-03 00:00:000.00.01.01.01.0794078.012
1987655NaN-- rcv151104-lv-27 -- linda casa con area d...CasaSENDA CELESTIAL 10QuerétaroQuerétaro0.03.02.0...NaN2016-01-15 00:00:000.00.00.00.00.01850000.042
2090071terreno residencial en venta en cumbres quinta...<p>excelente terreno plano, frente a parque en...TerrenoTERRENO RESIDENCIAL EN VENTA EN CUMBRESMonterreyNuevo León0.0NaN0.0...NaN2013-12-02 00:00:000.00.00.00.00.0490000.015
21224513casa en venta en querétaronocnok id: mx15-bk0340. hermosa casa en conjun...Casa en condominioNaNQuerétaroQuerétaroNaN3.02.0...NaN2016-02-01 00:00:000.00.00.00.00.02300000.012
22129818comoda casa con recamara en planta baja y ampl...<p>comoda casa con recamara en planta baja y a...CasaNaNCiudad MaderoTamaulipas0.04.0NaN...-97.8410122013-12-12 00:00:000.00.00.00.00.01340000.0111
23146699casa en condominio en venta, benito juarez, ca...casa en venta en cancun en residencial cumbres...Casa en condominioNaNCancúnQuintana Roo4.03.02.0...-86.8463232015-05-19 00:00:000.00.00.00.00.03500000.016
24235958casa en venta - colinas del cimatario, queréta...<p>propiedad en un nivel, con espacios amplios...CasaColinas del Cimatario C721QuerétaroQuerétaro4.03.0NaN...-100.3673822013-09-26 00:00:000.00.00.01.01.03900000.042
2597918casa venta santa ceciliahermosa casa en santa cecilia en apodaca, cerc...CasaLopez VelardeApodacaNuevo León8.03.02.0...NaN2016-12-31 00:00:000.00.00.01.01.0850000.015
2670294casa en venta de un nivel , frac. misiones de ...¡excelente oportunidad! \n\ncasa de un nivel ...Casa en condominioMISIONES DE SAN FRANCISCOPueblaPuebla3.02.01.0...NaN2014-07-22 00:00:000.00.00.01.01.0310000.019
27177031NaNhermosa casa remodelada, con acabados de lujo,...Casa en condominioPASEO DE LOS LAURELESMiguel HidalgoDistrito Federal18.03.03.0...NaN2012-03-07 00:00:000.00.00.00.00.010800000.021
28144635casa en venta en santillana icasa en coto 3 recamaras, estudio, sala-comedo...Casa en condominioCoto Santillana I Casa G11ZapopanJalisco1.03.02.0...-103.4538082015-01-20 00:00:000.00.00.00.00.02600000.013
29146158terreno urbano bardeado zona tesistánen zona de urbanización progresiva, terreno pl...Terreno comercialcamino viejo a TesistanZapopanJalisco20.0NaN0.0...-103.4770622016-12-03 00:00:000.00.00.01.00.010707900.013
-

30 rows × 25 columns

-
-
- -
-
From be2ee665b927a9dea3d30aa2d33addc2e2be82dd Mon Sep 17 00:00:00 2001 From: tlofano Date: Tue, 26 Nov 2019 19:18:43 -0300 Subject: [PATCH 7/8] Correccion en columnas categoricas --- features_complementarias.ipynb | 645 ++++++++++++++++++++--- html/features_complementarias.html | 803 +++++++++++++++++++++++++++-- 2 files changed, 1348 insertions(+), 100 deletions(-) diff --git a/features_complementarias.ipynb b/features_complementarias.ipynb index d3bec33..79a7d7f 100644 --- a/features_complementarias.ipynb +++ b/features_complementarias.ipynb @@ -2,19 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import ipynb.fs.full.features as features_nltk\n", "\n", - "df_train = pd.read_csv('./data/train.csv')" + "# df_train = pd.read_csv('./data/train.csv')\n", + "# df_test = pd.read_csv('./data/test.csv')" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -23,7 +24,10 @@ " '''Ver referencia columna_a_ohe para los ultimos 3 parametros'''\n", " \n", " def limpiar_espacios_blanco(texto):\n", - " return texto.replace(' ', '_')\n", + " try:\n", + " return texto.replace(' ', '_')\n", + " except:\n", + " pass\n", " \n", " df = df.copy()\n", " df[columna1] = df[columna1].apply(limpiar_espacios_blanco)\n", @@ -36,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -82,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -119,109 +123,628 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": { "scrolled": false }, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',\n", - " 'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',\n", - " 'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',\n", - " 'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',\n", - " 'centroscomercialescercanos', 'precio'],\n", - " dtype='object')" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " \n", - "/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:28: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" - ] - } - ], + "outputs": [], "source": [ - "df = df_train.head()\n", - "# df = df_train\n", - "\n", - "#Ejs:\n", - "# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", - "# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", - "# cantidad_atributo(df, 'provincia')\n", - "# ranking_atributo(df, 'provincia')\n", - "\n", - "def aplicar_features(df):\n", + "def aplicar_features(df_recibido):\n", + " df = df_recibido.copy() \n", " df = concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", " df = concatenar_categoricos(df, 'provincia', 'ciudad')\n", " # -- \n", - " display('Check-1')\n", + " display('Check-1.0')\n", " df = cantidad_atributo(df, 'provincia')\n", + " display('Check-1.1')\n", " df = cantidad_atributo(df, 'ciudad')\n", + " display('Check-1.2')\n", " df = cantidad_atributo(df, 'antiguedad')\n", + " display('Check-1.3')\n", " df = cantidad_atributo(df, 'habitaciones')\n", + " display('Check-1.4')\n", " df = cantidad_atributo(df, 'garages')\n", + " display('Check-1.5')\n", " df = cantidad_atributo(df, 'banos')\n", + " display('Check-1.6')\n", " df = cantidad_atributo(df, 'gimnasio')\n", + " display('Check-1.7')\n", " df = cantidad_atributo(df, 'usosmultiples')\n", + " display('Check-1.8')\n", " df = cantidad_atributo(df, 'escuelascercanas')\n", + " display('Check-1.9')\n", " df = cantidad_atributo(df, 'centroscomercialescercanos')\n", + " display('Check-1.10')\n", " df = cantidad_atributo(df, 'piscina')\n", " # --\n", - " display('Check-2')\n", + " display('Check-2.0')\n", " df = ranking_atributo(df, 'provincia')\n", + " display('Check-2.1')\n", " df = ranking_atributo(df, 'tipodepropiedad')\n", + " display('Check-2.2')\n", " df = ranking_atributo(df, 'ciudad')\n", + " display('Check-2.3')\n", " df = ranking_atributo(df, 'antiguedad')\n", + " display('Check-2.4')\n", " df = ranking_atributo(df, 'habitaciones')\n", + " display('Check-2.5')\n", " df = ranking_atributo(df, 'garages')\n", + " display('Check-2.6')\n", " df = ranking_atributo(df, 'banos')\n", - " df = ranking_atributo(df, 'gimasio')\n", + " display('Check-2.7')\n", + " df = ranking_atributo(df, 'gimnasio')\n", + " display('Check-2.8')\n", " df = ranking_atributo(df, 'usosmultiples')\n", + " display('Check-2.9')\n", " df = ranking_atributo(df, 'escuelascercanas')\n", - " df = ranking_atributo(df, 'centrocomercialescercanos')\n", + " display('Check-2.10')\n", + " df = ranking_atributo(df, 'centroscomercialescercanos') \n", " # --\n", - " display('Check-3')\n", + " display('Check-3.0')\n", " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", + " display('Check-3.1')\n", " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'ciudad')\n", + " display('Check-3.2')\n", " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'antiguedad')\n", + " display('Check-3.3')\n", " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'habitaciones')\n", + " display('Check-3.4')\n", " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'garages')\n", + " display('Check-3.5')\n", " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'banos')\n", + " display('Check-3.6')\n", " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'gimnasio')\n", + " display('Check-3.7')\n", " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'usosmultiples')\n", - " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'pisicina')\n", - " display('Check-4')\n", + " display('Check-3.8')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'piscina')\n", + " display('Check-4.0')\n", " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'antiguedad')\n", + " display('Check-4.1')\n", " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'habitaciones')\n", + " display('Check-4.2')\n", " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'garages')\n", + " display('Check-4.3')\n", " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'banos')\n", + " display('Check-4.4')\n", " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'gimnasio')\n", + " display('Check-4.5')\n", " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'usosmultiples')\n", - " df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'ciudad')\n", - " display('Check-5')\n", + " display('Check-5.6')\n", " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'antiguedad')\n", + " display('Check-5.7')\n", " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'habitaciones')\n", + " display('Check-5.8')\n", " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'garages')\n", + " display('Check-5.9')\n", " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'banos')\n", + " display('Check-5.10')\n", " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'gimnasio')\n", + " display('Check-5.11')\n", " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'usosmultiples')\n", - " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'pisina')" + " display('Check-5.12')\n", + " df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'piscina')\n", + " display('Check-6.0')\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',\n", + " 'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',\n", + " 'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',\n", + " 'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',\n", + " 'centroscomercialescercanos', 'precio'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(240000, 23)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.1'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.2'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.3'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.4'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.5'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.6'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.7'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.8'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.9'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-1.10'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.1'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.2'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.3'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.4'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.5'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.6'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.7'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.8'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.9'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-2.10'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.1'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.2'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.3'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.4'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.5'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.6'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.7'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-3.8'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-4.0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-4.1'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-4.2'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-4.3'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-4.4'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-4.5'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-5.6'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-5.7'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-5.8'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-5.9'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-5.10'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-5.11'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-5.12'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'Check-6.0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = df_train\n", + "# df = df_test\n", + "\n", + "display(df.columns)\n", + "display(df.shape)\n", + "\n", + "#Ejs:\n", + "# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", + "# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", + "# cantidad_atributo(df, 'provincia')\n", + "# ranking_atributo(df, 'provincia')\n", + "\n", + "df = aplicar_features(df)\n", + "# df.to_csv('masfeatures_train.csv', index=False)\n", + "# df.to_csv('masfeatures_test.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Las columnas de tipo Object:'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "['titulo',\n", + " 'descripcion',\n", + " 'tipodepropiedad',\n", + " 'direccion',\n", + " 'ciudad',\n", + " 'provincia',\n", + " 'fecha',\n", + " 'tipodepropiedad_provincia',\n", + " 'provincia_ciudad']" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_train = pd.read_csv('./masfeatures_train.csv')\n", + "df_eval = pd.read_csv('./masfeatures_test.csv')\n", + "\n", + "\n", + "# Correccion csv - strings que deberian ser floats\n", + "df_train['antiguedad'] = df_train['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)\n", + "df_eval['antiguedad'] = df_eval['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)\n", + "df_eval['antiguedad'] = df_eval['antiguedad'].astype(float)\n", + "df_train['antiguedad'] = df_train['antiguedad'].astype(float)\n", + "\n", + "df_train['habitaciones'] = df_train['habitaciones'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['habitaciones'] = df_eval['habitaciones'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['habitaciones'] = df_eval['habitaciones'].astype(float)\n", + "df_train['habitaciones'] = df_train['habitaciones'].astype(float)\n", + "\n", + "df_train['garages'] = df_train['garages'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['garages'] = df_eval['garages'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['garages'] = df_eval['garages'].astype(float)\n", + "df_train['garages'] = df_train['garages'].astype(float)\n", + "\n", + "df_train['banos'] = df_train['banos'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['banos'] = df_eval['banos'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['banos'] = df_eval['banos'].astype(float)\n", + "df_train['banos'] = df_train['banos'].astype(float)\n", + "\n", + "# Las columnas de tipo object\n", + "x = df_train.columns.to_series().groupby(df_train.dtypes).groups\n", + "display(\"Las columnas de tipo Object:\")\n", + "display(list(x[list(x.keys())[len(list(x.keys()))-1]]))\n", + "\n", + "df_eval.to_csv('masfeatures_train_f.csv', index=False)\n", + "df_train.to_csv('masfeatures_test_f.csv', index=False)" ] } ], diff --git a/html/features_complementarias.html b/html/features_complementarias.html index 5b8e540..a958235 100644 --- a/html/features_complementarias.html +++ b/html/features_complementarias.html @@ -13076,13 +13076,14 @@
-
In [3]:
+
In [1]:
import pandas as pd
 import ipynb.fs.full.features as features_nltk
 
-df_train = pd.read_csv('./data/train.csv')
+# df_train = pd.read_csv('./data/train.csv')
+# df_test = pd.read_csv('./data/test.csv')
 
@@ -13092,7 +13093,7 @@
-
In [4]:
+
In [2]:
def concatenar_categoricos(df, columna1, columna2, N=0, df_aux=None, devolver_cols=False):
@@ -13100,7 +13101,10 @@
     '''Ver referencia columna_a_ohe para los ultimos 3 parametros'''
     
     def limpiar_espacios_blanco(texto):
-        return texto.replace(' ', '_')
+        try:
+            return texto.replace(' ', '_')
+        except:
+            pass
         
     df = df.copy()
     df[columna1] = df[columna1].apply(limpiar_espacios_blanco)
@@ -13118,7 +13122,7 @@
 
-
In [5]:
+
In [3]:
def cantidad_propiedad_misma_cantidad(df, agrupar_col, misma_cantidad_col):
@@ -13147,7 +13151,7 @@
 
-
In [6]:
+
In [4]:
def cantidad_atributo(df, col):
@@ -13172,7 +13176,7 @@
 
-
In [7]:
+
In [5]:
def ranking_atributo(df, col):
@@ -13213,74 +13217,133 @@
 
-
In [13]:
+
In [6]:
-
df = df_train.head()
-# df = df_train
-
-#Ejs:
-# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
-# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
-# cantidad_atributo(df, 'provincia')
-# ranking_atributo(df, 'provincia')
-
-def aplicar_features(df):
+
def aplicar_features(df_recibido):
+    df = df_recibido.copy()    
     df = concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
     df = concatenar_categoricos(df, 'provincia', 'ciudad')
     # -- 
-    display('Check-1')
+    display('Check-1.0')
     df = cantidad_atributo(df, 'provincia')
+    display('Check-1.1')
     df = cantidad_atributo(df, 'ciudad')
+    display('Check-1.2')
     df = cantidad_atributo(df, 'antiguedad')
+    display('Check-1.3')
     df = cantidad_atributo(df, 'habitaciones')
+    display('Check-1.4')
     df = cantidad_atributo(df, 'garages')
+    display('Check-1.5')
     df = cantidad_atributo(df, 'banos')
+    display('Check-1.6')
     df = cantidad_atributo(df, 'gimnasio')
+    display('Check-1.7')
     df = cantidad_atributo(df, 'usosmultiples')
+    display('Check-1.8')
     df = cantidad_atributo(df, 'escuelascercanas')
+    display('Check-1.9')
     df = cantidad_atributo(df, 'centroscomercialescercanos')
+    display('Check-1.10')
     df = cantidad_atributo(df, 'piscina')
     # --
-    display('Check-2')
+    display('Check-2.0')
     df = ranking_atributo(df, 'provincia')
+    display('Check-2.1')
     df = ranking_atributo(df, 'tipodepropiedad')
+    display('Check-2.2')
     df = ranking_atributo(df, 'ciudad')
+    display('Check-2.3')
     df = ranking_atributo(df, 'antiguedad')
+    display('Check-2.4')
     df = ranking_atributo(df, 'habitaciones')
+    display('Check-2.5')
     df = ranking_atributo(df, 'garages')
+    display('Check-2.6')
     df = ranking_atributo(df, 'banos')
-    df = ranking_atributo(df, 'gimasio')
+    display('Check-2.7')
+    df = ranking_atributo(df, 'gimnasio')
+    display('Check-2.8')
     df = ranking_atributo(df, 'usosmultiples')
+    display('Check-2.9')
     df = ranking_atributo(df, 'escuelascercanas')
-    df = ranking_atributo(df, 'centrocomercialescercanos')
+    display('Check-2.10')
+    df = ranking_atributo(df, 'centroscomercialescercanos')    
     # --
-    display('Check-3')
+    display('Check-3.0')
     df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
+    display('Check-3.1')
     df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'ciudad')
+    display('Check-3.2')
     df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'antiguedad')
+    display('Check-3.3')
     df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'habitaciones')
+    display('Check-3.4')
     df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'garages')
+    display('Check-3.5')
     df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'banos')
+    display('Check-3.6')
     df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'gimnasio')
+    display('Check-3.7')
     df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'usosmultiples')
-    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'pisicina')
-    display('Check-4')
+    display('Check-3.8')
+    df = cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'piscina')
+    display('Check-4.0')
     df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'antiguedad')
+    display('Check-4.1')
     df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'habitaciones')
+    display('Check-4.2')
     df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'garages')
+    display('Check-4.3')
     df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'banos')
+    display('Check-4.4')
     df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'gimnasio')
+    display('Check-4.5')
     df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'usosmultiples')
-    df = cantidad_propiedad_misma_cantidad(df, 'ciudad', 'ciudad')
-    display('Check-5')
+    display('Check-5.6')
     df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'antiguedad')
+    display('Check-5.7')
     df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'habitaciones')
+    display('Check-5.8')
     df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'garages')
+    display('Check-5.9')
     df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'banos')
+    display('Check-5.10')
     df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'gimnasio')
+    display('Check-5.11')
     df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'usosmultiples')
-    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'pisina')
+    display('Check-5.12')
+    df = cantidad_propiedad_misma_cantidad(df, 'provincia', 'piscina')
+    display('Check-6.0')
+    return df
+
+ +
+
+
+ +
+
+
+
In [7]:
+
+
+
df = df_train
+# df = df_test
+
+display(df.columns)
+display(df.shape)
+
+#Ejs:
+# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
+# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')
+# cantidad_atributo(df, 'provincia')
+# ranking_atributo(df, 'provincia')
+
+df = aplicar_features(df)
+# df.to_csv('masfeatures_train.csv', index=False)
+# df.to_csv('masfeatures_test.csv', index=False)
 
@@ -13314,20 +13377,682 @@
-
-
/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: 
-A value is trying to be set on a copy of a slice from a DataFrame.
-Try using .loc[row_indexer,col_indexer] = value instead
 
-See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
-  
-/home/tomas/Facultad/datos/datos-tp2/.venv/lib/python3.6/site-packages/ipykernel_launcher.py:28: SettingWithCopyWarning: 
-A value is trying to be set on a copy of a slice from a DataFrame.
-Try using .loc[row_indexer,col_indexer] = value instead
 
-See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
-
+
+
(240000, 23)
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.0'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.1'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.2'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.3'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.4'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.5'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.6'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.7'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.8'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.9'
+
+ +
+ +
+ +
+ + + + +
+
'Check-1.10'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.0'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.1'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.2'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.3'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.4'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.5'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.6'
+
+
+ +
+ +
+ + + + +
+
'Check-2.7'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.8'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.9'
+
+ +
+ +
+ +
+ + + + +
+
'Check-2.10'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.0'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.1'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.2'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.3'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.4'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.5'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.6'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.7'
+
+ +
+ +
+ +
+ + + + +
+
'Check-3.8'
+
+ +
+ +
+ +
+ + + + +
+
'Check-4.0'
+
+ +
+ +
+ +
+ + + + +
+
'Check-4.1'
+
+ +
+ +
+ +
+ + + + +
+
'Check-4.2'
+
+ +
+ +
+ +
+ + + + +
+
'Check-4.3'
+
+ +
+ +
+ +
+ + + + +
+
'Check-4.4'
+
+ +
+ +
+ +
+ + + + +
+
'Check-4.5'
+
+ +
+ +
+ +
+ + + + +
+
'Check-5.6'
+
+ +
+ +
+ +
+ + + + +
+
'Check-5.7'
+
+ +
+ +
+ +
+ + + + +
+
'Check-5.8'
+
+ +
+ +
+ +
+ + + + +
+
'Check-5.9'
+
+ +
+ +
+ +
+ + + + +
+
'Check-5.10'
+
+ +
+ +
+ +
+ + + + +
+
'Check-5.11'
+
+ +
+ +
+ +
+ + + + +
+
'Check-5.12'
+
+ +
+ +
+ +
+ + + + +
+
'Check-6.0'
+
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
df_train = pd.read_csv('./masfeatures_train.csv')
+df_eval = pd.read_csv('./masfeatures_test.csv')
+
+
+# Correccion csv - strings que deberian ser floats
+df_train['antiguedad'] = df_train['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)
+df_eval['antiguedad'] = df_eval['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)
+df_eval['antiguedad'] = df_eval['antiguedad'].astype(float)
+df_train['antiguedad'] = df_train['antiguedad'].astype(float)
+
+df_train['habitaciones'] = df_train['habitaciones'].apply(lambda x: None if x == 'otros' else x)
+df_eval['habitaciones'] = df_eval['habitaciones'].apply(lambda x: None if x == 'otros' else x)
+df_eval['habitaciones'] = df_eval['habitaciones'].astype(float)
+df_train['habitaciones'] = df_train['habitaciones'].astype(float)
+
+df_train['garages'] = df_train['garages'].apply(lambda x: None if x == 'otros' else x)
+df_eval['garages'] = df_eval['garages'].apply(lambda x: None if x == 'otros' else x)
+df_eval['garages'] = df_eval['garages'].astype(float)
+df_train['garages'] = df_train['garages'].astype(float)
+
+df_train['banos'] = df_train['banos'].apply(lambda x: None if x == 'otros' else x)
+df_eval['banos'] = df_eval['banos'].apply(lambda x: None if x == 'otros' else x)
+df_eval['banos'] = df_eval['banos'].astype(float)
+df_train['banos'] = df_train['banos'].astype(float)
+
+# Las columnas de tipo object
+x = df_train.columns.to_series().groupby(df_train.dtypes).groups
+display("Las columnas de tipo Object:")
+display(list(x[list(x.keys())[len(list(x.keys()))-1]]))
+
+df_eval.to_csv('masfeatures_train_f.csv', index=False)
+df_train.to_csv('masfeatures_test_f.csv', index=False)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+
'Las columnas de tipo Object:'
+
+ +
+ +
+ +
+ + + + +
+
['titulo',
+ 'descripcion',
+ 'tipodepropiedad',
+ 'direccion',
+ 'ciudad',
+ 'provincia',
+ 'fecha',
+ 'tipodepropiedad_provincia',
+ 'provincia_ciudad']
+
+
From 85b4d40eb6aa56999a1e0a6be91fbdad5a4affa0 Mon Sep 17 00:00:00 2001 From: tlofano Date: Sun, 1 Dec 2019 17:40:15 -0300 Subject: [PATCH 8/8] Sin ohe --- features_complementarias.ipynb | 1524 +++++++++++++++++++--------- html/features_complementarias.html | 1382 ++++++++++++++----------- 2 files changed, 1841 insertions(+), 1065 deletions(-) diff --git a/features_complementarias.ipynb b/features_complementarias.ipynb index 79a7d7f..f379863 100644 --- a/features_complementarias.ipynb +++ b/features_complementarias.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -65,28 +65,41 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, + "execution_count": null, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "def cantidad_atributo(df, col):\n", + "def cantidad_atributo(df, col, df_test):\n", " '''Simil a value_counts. Dada una columna, agrega al df, cuantas propiedades existen con esa misma propiedad'''\n", + " '''Se rellena en df_test, con los counts que se hicieron en df'''\n", " df_aux = df.copy()\n", " df_aux = df_aux.groupby([col]).agg({'id': 'count'})\n", " df_aux = df_aux.rename(columns={'id':'cantidad'})\n", " df_aux = df_aux.reset_index()\n", + " \n", + " prop_cantidad = {}\n", "\n", " def get_cantidad(col1):\n", " cantidad = df_aux[df_aux[col] == col1]['cantidad']\n", - " return cantidad.values[0] if len(cantidad.values > 0) else 0\n", + " if col1 in prop_cantidad:\n", + " return prop_cantidad[col1]\n", + " cantidad_retornar = cantidad.values[0] if len(cantidad.values > 0) else 0\n", + " prop_cantidad[col1] = cantidad_retornar\n", + " return cantidad_retornar\n", " \n", " df['cantidad_propiedades_en_' + col] = df.apply(lambda x: get_cantidad(x[col]), axis=1)\n", - " return df" + " \n", + " df_test['cantidad_propiedades_en_' + col] = df_test.apply(\n", + " lambda x: prop_cantidad[x[col]] if x[col] in prop_cantidad else 0, axis=1)\n", + " \n", + " return df, df_test" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -123,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "scrolled": false }, @@ -131,6 +144,7 @@ "source": [ "def aplicar_features(df_recibido):\n", " df = df_recibido.copy() \n", + " # -- \n", " df = concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", " df = concatenar_categoricos(df, 'provincia', 'ciudad')\n", " # -- \n", @@ -230,207 +244,83 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = df_train\n", + "# df = df_test\n", + "\n", + "display(df.columns)\n", + "display(df.shape)\n", + "\n", + "#Ejs:\n", + "# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", + "# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", + "# cantidad_atributo(df, 'provincia')\n", + "# ranking_atributo(df, 'provincia')\n", + "\n", + "df = aplicar_features(df)\n", + "# df.to_csv('masfeatures_train.csv', index=False)\n", + "# df.to_csv('masfeatures_test.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_train = pd.read_csv('./masfeatures_train.csv')\n", + "df_eval = pd.read_csv('./masfeatures_test.csv')\n", + "\n", + "\n", + "# Correccion csv - strings que deberian ser floats\n", + "df_train['antiguedad'] = df_train['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)\n", + "df_eval['antiguedad'] = df_eval['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)\n", + "df_eval['antiguedad'] = df_eval['antiguedad'].astype(float)\n", + "df_train['antiguedad'] = df_train['antiguedad'].astype(float)\n", + "\n", + "df_train['habitaciones'] = df_train['habitaciones'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['habitaciones'] = df_eval['habitaciones'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['habitaciones'] = df_eval['habitaciones'].astype(float)\n", + "df_train['habitaciones'] = df_train['habitaciones'].astype(float)\n", + "\n", + "df_train['garages'] = df_train['garages'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['garages'] = df_eval['garages'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['garages'] = df_eval['garages'].astype(float)\n", + "df_train['garages'] = df_train['garages'].astype(float)\n", + "\n", + "df_train['banos'] = df_train['banos'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['banos'] = df_eval['banos'].apply(lambda x: None if x == 'otros' else x)\n", + "df_eval['banos'] = df_eval['banos'].astype(float)\n", + "df_train['banos'] = df_train['banos'].astype(float)\n", + "\n", + "# Las columnas de tipo object\n", + "x = df_train.columns.to_series().groupby(df_train.dtypes).groups\n", + "display(\"Las columnas de tipo Object:\")\n", + "display(list(x[list(x.keys())[len(list(x.keys()))-1]]))\n", + "\n", + "df_eval.to_csv('masfeatures_train_f.csv', index=False)\n", + "df_train.to_csv('masfeatures_test_f.csv', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Las mismas features anteriores, sin ohe - train" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',\n", - " 'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',\n", - " 'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',\n", - " 'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',\n", - " 'centroscomercialescercanos', 'precio'],\n", - " dtype='object')" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "(240000, 23)" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.0'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.1'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.2'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.3'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.4'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.5'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.6'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.7'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.8'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.9'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-1.10'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.0'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.1'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.2'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.3'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.4'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.5'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.6'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.7'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.8'" + "69" ] }, "metadata": {}, @@ -439,214 +329,39 @@ { "data": { "text/plain": [ - "'Check-2.9'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-2.10'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.0'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.1'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.2'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.3'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.4'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.5'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.6'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.7'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-3.8'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-4.0'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-4.1'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-4.2'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-4.3'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-4.4'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-4.5'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-5.6'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-5.7'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-5.8'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-5.9'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-5.10'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-5.11'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Check-5.12'" + "Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',\n", + " 'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',\n", + " 'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',\n", + " 'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',\n", + " 'centroscomercialescercanos', 'precio', 'tipodepropiedad_provincia',\n", + " 'provincia_ciudad', 'cantidad_propiedades_en_provincia',\n", + " 'cantidad_propiedades_en_ciudad', 'cantidad_propiedades_en_antiguedad',\n", + " 'cantidad_propiedades_en_habitaciones',\n", + " 'cantidad_propiedades_en_garages', 'cantidad_propiedades_en_banos',\n", + " 'cantidad_propiedades_en_gimnasio',\n", + " 'cantidad_propiedades_en_usosmultiples',\n", + " 'cantidad_propiedades_en_escuelascercanas',\n", + " 'cantidad_propiedades_en_centroscomercialescercanos',\n", + " 'cantidad_propiedades_en_piscina', 'ranking_en_provincia',\n", + " 'ranking_en_tipodepropiedad', 'ranking_en_ciudad',\n", + " 'ranking_en_antiguedad', 'ranking_en_habitaciones',\n", + " 'ranking_en_garages', 'ranking_en_banos', 'ranking_en_gimnasio',\n", + " 'ranking_en_usosmultiples', 'ranking_en_escuelascercanas',\n", + " 'ranking_en_centroscomercialescercanos',\n", + " 'cantidad_tipodepropiedad_provincia', 'cantidad_tipodepropiedad_ciudad',\n", + " 'cantidad_tipodepropiedad_antiguedad',\n", + " 'cantidad_tipodepropiedad_habitaciones',\n", + " 'cantidad_tipodepropiedad_garages', 'cantidad_tipodepropiedad_banos',\n", + " 'cantidad_tipodepropiedad_gimnasio',\n", + " 'cantidad_tipodepropiedad_usosmultiples',\n", + " 'cantidad_tipodepropiedad_piscina', 'cantidad_ciudad_antiguedad',\n", + " 'cantidad_ciudad_habitaciones', 'cantidad_ciudad_garages',\n", + " 'cantidad_ciudad_banos', 'cantidad_ciudad_gimnasio',\n", + " 'cantidad_ciudad_usosmultiples', 'cantidad_provincia_antiguedad',\n", + " 'cantidad_provincia_habitaciones', 'cantidad_provincia_garages',\n", + " 'cantidad_provincia_banos', 'cantidad_provincia_gimnasio',\n", + " 'cantidad_provincia_usosmultiples', 'cantidad_provincia_piscina'],\n", + " dtype='object')" ] }, "metadata": {}, @@ -654,8 +369,437 @@ }, { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitulodescripciontipodepropiedaddireccionciudadprovinciaantiguedadhabitacionesgarages...cantidad_ciudad_banoscantidad_ciudad_gimnasiocantidad_ciudad_usosmultiplescantidad_provincia_antiguedadcantidad_provincia_habitacionescantidad_provincia_garagescantidad_provincia_banoscantidad_provincia_gimnasiocantidad_provincia_usosmultiplescantidad_provincia_piscina
0254099depto. tipo a-402depto. interior de 80.15m2, consta de sala com...ApartamentoAvenida Division del Norte 2005Benito_JuárezDistrito_FederalNaN2.01.0...612710391103927041210621921423514543805474656163
153461condominio horizontal en venta<p>entre sonora y guerrero, atr&aacute;s del h...Casa_en_condominioAV. MEXICOLa_Magdalena_ContrerasDistrito_Federal10.03.02.0...607184218128904234791858723514543805474656163
2247984casa en venta urbi 3 recamaras tonaladescripcion \\nla mejor ubicacion residencial e...CasaUrbi TonalaTonaláJalisco5.03.02.0...25778377842561049291807527198941943118756
3209067casa sola en toluca zinacantepec con credito i...casa en privada con caseta de vigilancia casas...CasaIGNACIO MANUEL ALTAMIRANO 128ZinacantepecEdo._de_México1.02.01.0...46149148144994061168912013385413854739277
4185997paseos del solbonito departamento en excelentes condiciones ...ApartamentoPASEOS DEL SOLZapopanJalisco10.02.01.0...1704963493062749418851234434198941943118756
..................................................................
239995119879bonita casas de 2 recamaras a 10 minutos del c...vendo casa en bosques de ica residencial a 10 ...CasaBOSQUESZinacantepecEdo._de_México0.02.02.0...46149148564994061432212013385413854739277
239996259178casa en condominio a 10 min. del centro de tolucacasa con un jardin amplio, un cuarto de servic...CasaFiliberto Navas 325TolucaEdo._de_México0.03.03.0...2271932194756492220059769335385413854739277
239997131932nicolas san juandepartamento con excelente ubicación, muy cerc...ApartamentoNicolas San JuanBenito_JuárezDistrito_Federal20.02.01.0...612710391103929323210621921423514543805474656163
239998146867casa sola. javier rojo gomez.casa sola, dividida en cuatro departamentos de...CasaJavier Rojo Gomez 120IztapalapaDistrito_Federal20.04.00.0...99503028932350658472292644105474656163
239999121958departamento en bosques de las lomas / av. st...id:19816, muy bonito e iluminado departamento,...ApartamentoAVE. STIMCuajimalpa_de_MorelosDistrito_Federal1.03.02.0...1244230924902411234791858723514543805474656163
\n", + "

240000 rows × 69 columns

\n", + "
" + ], "text/plain": [ - "'Check-6.0'" + " id titulo \\\n", + "0 254099 depto. tipo a-402 \n", + "1 53461 condominio horizontal en venta \n", + "2 247984 casa en venta urbi 3 recamaras tonala \n", + "3 209067 casa sola en toluca zinacantepec con credito i... \n", + "4 185997 paseos del sol \n", + "... ... ... \n", + "239995 119879 bonita casas de 2 recamaras a 10 minutos del c... \n", + "239996 259178 casa en condominio a 10 min. del centro de toluca \n", + "239997 131932 nicolas san juan \n", + "239998 146867 casa sola. javier rojo gomez. \n", + "239999 121958 departamento en bosques de las lomas / av. st... \n", + "\n", + " descripcion tipodepropiedad \\\n", + "0 depto. interior de 80.15m2, consta de sala com... Apartamento \n", + "1

entre sonora y guerrero, atrás del h... Casa_en_condominio \n", + "2 descripcion \\nla mejor ubicacion residencial e... Casa \n", + "3 casa en privada con caseta de vigilancia casas... Casa \n", + "4 bonito departamento en excelentes condiciones ... Apartamento \n", + "... ... ... \n", + "239995 vendo casa en bosques de ica residencial a 10 ... Casa \n", + "239996 casa con un jardin amplio, un cuarto de servic... Casa \n", + "239997 departamento con excelente ubicación, muy cerc... Apartamento \n", + "239998 casa sola, dividida en cuatro departamentos de... Casa \n", + "239999 id:19816, muy bonito e iluminado departamento,... Apartamento \n", + "\n", + " direccion ciudad \\\n", + "0 Avenida Division del Norte 2005 Benito_Juárez \n", + "1 AV. MEXICO La_Magdalena_Contreras \n", + "2 Urbi Tonala Tonalá \n", + "3 IGNACIO MANUEL ALTAMIRANO 128 Zinacantepec \n", + "4 PASEOS DEL SOL Zapopan \n", + "... ... ... \n", + "239995 BOSQUES Zinacantepec \n", + "239996 Filiberto Navas 325 Toluca \n", + "239997 Nicolas San Juan Benito_Juárez \n", + "239998 Javier Rojo Gomez 120 Iztapalapa \n", + "239999 AVE. STIM Cuajimalpa_de_Morelos \n", + "\n", + " provincia antiguedad habitaciones garages ... \\\n", + "0 Distrito_Federal NaN 2.0 1.0 ... \n", + "1 Distrito_Federal 10.0 3.0 2.0 ... \n", + "2 Jalisco 5.0 3.0 2.0 ... \n", + "3 Edo._de_México 1.0 2.0 1.0 ... \n", + "4 Jalisco 10.0 2.0 1.0 ... \n", + "... ... ... ... ... ... \n", + "239995 Edo._de_México 0.0 2.0 2.0 ... \n", + "239996 Edo._de_México 0.0 3.0 3.0 ... \n", + "239997 Distrito_Federal 20.0 2.0 1.0 ... \n", + "239998 Distrito_Federal 20.0 4.0 0.0 ... \n", + "239999 Distrito_Federal 1.0 3.0 2.0 ... \n", + "\n", + " cantidad_ciudad_banos cantidad_ciudad_gimnasio \\\n", + "0 6127 10391 \n", + "1 607 1842 \n", + "2 257 783 \n", + "3 46 149 \n", + "4 1704 9634 \n", + "... ... ... \n", + "239995 46 149 \n", + "239996 227 1932 \n", + "239997 6127 10391 \n", + "239998 99 50 \n", + "239999 1244 2309 \n", + "\n", + " cantidad_ciudad_usosmultiples cantidad_provincia_antiguedad \\\n", + "0 10392 7041 \n", + "1 1812 8904 \n", + "2 778 4256 \n", + "3 148 1449 \n", + "4 9306 2749 \n", + "... ... ... \n", + "239995 148 5649 \n", + "239996 1947 5649 \n", + "239997 10392 9323 \n", + "239998 3028 9323 \n", + "239999 2490 2411 \n", + "\n", + " cantidad_provincia_habitaciones cantidad_provincia_garages \\\n", + "0 21062 19214 \n", + "1 23479 18587 \n", + "2 10492 9180 \n", + "3 9406 11689 \n", + "4 4188 5123 \n", + "... ... ... \n", + "239995 9406 14322 \n", + "239996 22200 5976 \n", + "239997 21062 19214 \n", + "239998 5065 8472 \n", + "239999 23479 18587 \n", + "\n", + " cantidad_provincia_banos cantidad_provincia_gimnasio \\\n", + "0 23514 54380 \n", + "1 23514 54380 \n", + "2 7527 19894 \n", + "3 12013 38541 \n", + "4 4434 19894 \n", + "... ... ... \n", + "239995 12013 38541 \n", + "239996 9335 38541 \n", + "239997 23514 54380 \n", + "239998 2926 4410 \n", + "239999 23514 54380 \n", + "\n", + " cantidad_provincia_usosmultiples cantidad_provincia_piscina \n", + "0 54746 56163 \n", + "1 54746 56163 \n", + "2 19431 18756 \n", + "3 38547 39277 \n", + "4 19431 18756 \n", + "... ... ... \n", + "239995 38547 39277 \n", + "239996 38547 39277 \n", + "239997 54746 56163 \n", + "239998 54746 56163 \n", + "239999 54746 56163 \n", + "\n", + "[240000 rows x 69 columns]" ] }, "metadata": {}, @@ -663,32 +807,78 @@ } ], "source": [ - "df = df_train\n", - "# df = df_test\n", + "df_train = pd.read_csv('./data/train.csv')\n", + "df1, features1 = concatenar_categoricos(df_train, 'tipodepropiedad', 'provincia', 0, None, True)\n", + "df2, features2 = concatenar_categoricos(df_train, 'provincia', 'ciudad', 0, None, True)\n", "\n", - "display(df.columns)\n", - "display(df.shape)\n", + "features_borarr = features1 + features2\n", + "df_features_sin_ohe = pd.read_csv('./masfeatures_train_f.csv')\n", + "df_features_sin_ohe = df_features_sin_ohe.drop(features_borarr, axis=1)\n", "\n", - "#Ejs:\n", - "# concatenar_categoricos(df, 'tipodepropiedad', 'provincia')\n", - "# cantidad_propiedad_misma_cantidad(df, 'tipodepropiedad', 'provincia')\n", - "# cantidad_atributo(df, 'provincia')\n", - "# ranking_atributo(df, 'provincia')\n", + "display(len(df_features_sin_ohe.columns))\n", + "display(df_features_sin_ohe.columns)\n", + "display(df_features_sin_ohe)\n", "\n", - "df = aplicar_features(df)\n", - "# df.to_csv('masfeatures_train.csv', index=False)\n", - "# df.to_csv('masfeatures_test.csv', index=False)" + "df_features_sin_ohe.to_csv('masfeatures_train_f_no_ohe.csv', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Las mismas features anteriores, sin ohe - train" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'Las columnas de tipo Object:'" + "68" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',\n", + " 'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',\n", + " 'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',\n", + " 'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',\n", + " 'centroscomercialescercanos', 'tipodepropiedad_provincia',\n", + " 'provincia_ciudad', 'cantidad_propiedades_en_provincia',\n", + " 'cantidad_propiedades_en_ciudad', 'cantidad_propiedades_en_antiguedad',\n", + " 'cantidad_propiedades_en_habitaciones',\n", + " 'cantidad_propiedades_en_garages', 'cantidad_propiedades_en_banos',\n", + " 'cantidad_propiedades_en_gimnasio',\n", + " 'cantidad_propiedades_en_usosmultiples',\n", + " 'cantidad_propiedades_en_escuelascercanas',\n", + " 'cantidad_propiedades_en_centroscomercialescercanos',\n", + " 'cantidad_propiedades_en_piscina', 'ranking_en_provincia',\n", + " 'ranking_en_tipodepropiedad', 'ranking_en_ciudad',\n", + " 'ranking_en_antiguedad', 'ranking_en_habitaciones',\n", + " 'ranking_en_garages', 'ranking_en_banos', 'ranking_en_gimnasio',\n", + " 'ranking_en_usosmultiples', 'ranking_en_escuelascercanas',\n", + " 'ranking_en_centroscomercialescercanos',\n", + " 'cantidad_tipodepropiedad_provincia', 'cantidad_tipodepropiedad_ciudad',\n", + " 'cantidad_tipodepropiedad_antiguedad',\n", + " 'cantidad_tipodepropiedad_habitaciones',\n", + " 'cantidad_tipodepropiedad_garages', 'cantidad_tipodepropiedad_banos',\n", + " 'cantidad_tipodepropiedad_gimnasio',\n", + " 'cantidad_tipodepropiedad_usosmultiples',\n", + " 'cantidad_tipodepropiedad_piscina', 'cantidad_ciudad_antiguedad',\n", + " 'cantidad_ciudad_habitaciones', 'cantidad_ciudad_garages',\n", + " 'cantidad_ciudad_banos', 'cantidad_ciudad_gimnasio',\n", + " 'cantidad_ciudad_usosmultiples', 'cantidad_provincia_antiguedad',\n", + " 'cantidad_provincia_habitaciones', 'cantidad_provincia_garages',\n", + " 'cantidad_provincia_banos', 'cantidad_provincia_gimnasio',\n", + " 'cantidad_provincia_usosmultiples', 'cantidad_provincia_piscina'],\n", + " dtype='object')" ] }, "metadata": {}, @@ -696,16 +886,437 @@ }, { "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtitulodescripciontipodepropiedaddireccionciudadprovinciaantiguedadhabitacionesgarages...cantidad_ciudad_banoscantidad_ciudad_gimnasiocantidad_ciudad_usosmultiplescantidad_provincia_antiguedadcantidad_provincia_habitacionescantidad_provincia_garagescantidad_provincia_banoscantidad_provincia_gimnasiocantidad_provincia_usosmultiplescantidad_provincia_piscina
04941casa en venta en miguel hidalgo, distrito federal<p>excelente casa estilo moderno.</p>CasaBosque de CedrosMiguel_HidalgoDistrito_Federal29.03.0NaN...54121312553158281643710136681374914136
151775departamentos en venta en montebello<p>departamento una recamara:\\n</p><p>departam...ApartamentoNaNMéridaYucatánNaN1.01.0...2521677179867047123278185419801575
2115253departamento nuevo delegación coyoacán de 87 m...departamento nuevo de 87.06 m2, 1 cajón de est...ApartamentoPueblo de los Reyes, Coyoacán, Mexico D.F.CoyoacánDistrito_Federal0.02.01.0...503124912491999526347245881136681374914136
3299321departamento en venta en acapulco<p> raíces dv001 precioso departamento tipo k...ApartamentoNaNAcapulco_de_JuárezGuerrero2.02.02.0...23056157226234235251604622310
4173570bonita casa sola equipada de dos niveles en lo...<p>casa sola, bonita de dos rec&aacute;maras u...CasaCEDROSTultitlánEdo._de_México10.02.01.0...1532312321958221928872894945494359632
..................................................................
5999575094oportunidad!! se vende amplia casa en col. moc...oportunidad!! ideal para oficina o casa habita...CasaOriente 172 # 265Venustiano_CarranzaDistrito_Federal20.04.03.0...303933952287127315002516136681374914136
59996171847colinas de ecatepeccasa, sala comedor, patio de servicio, buenas ...CasacolinasEcatepec_de_MorelosEdo._de_México10.03.01.0...1725515511958545828873466945494359632
59997138313estrene hermosa casa en sierra morenahermosa casa lista para habitarse ubicada en f...Casas/calleGuadalupeNuevo_León5.03.02.0...180459487585213615461212369838693772
59998271268zen house i venta de linda casa con acabados ...hermosa casa con acabados de lujo en fracciona...CasaZen House lQuerétaroQuerétaro0.02.01.0...12193032301114525685091733406140153766
5999972612casa en chapalita las fuentes/ cercano al club...<p>hermosa casa bien ubicada. cuenta con:</p>\\...CasaFuentes de San Agustín 5051ZapopanJalisco0.03.02.0...828239623151201266022941835497948654690
\n", + "

60000 rows × 68 columns

\n", + "
" + ], "text/plain": [ - "['titulo',\n", - " 'descripcion',\n", - " 'tipodepropiedad',\n", - " 'direccion',\n", - " 'ciudad',\n", - " 'provincia',\n", - " 'fecha',\n", - " 'tipodepropiedad_provincia',\n", - " 'provincia_ciudad']" + " id titulo \\\n", + "0 4941 casa en venta en miguel hidalgo, distrito federal \n", + "1 51775 departamentos en venta en montebello \n", + "2 115253 departamento nuevo delegación coyoacán de 87 m... \n", + "3 299321 departamento en venta en acapulco \n", + "4 173570 bonita casa sola equipada de dos niveles en lo... \n", + "... ... ... \n", + "59995 75094 oportunidad!! se vende amplia casa en col. moc... \n", + "59996 171847 colinas de ecatepec \n", + "59997 138313 estrene hermosa casa en sierra morena \n", + "59998 271268 zen house i venta de linda casa con acabados ... \n", + "59999 72612 casa en chapalita las fuentes/ cercano al club... \n", + "\n", + " descripcion tipodepropiedad \\\n", + "0

excelente casa estilo moderno.

Casa \n", + "1

departamento una recamara:\\n

departam... Apartamento \n", + "2 departamento nuevo de 87.06 m2, 1 cajón de est... Apartamento \n", + "3

raíces dv001 precioso departamento tipo k... Apartamento \n", + "4

casa sola, bonita de dos recámaras u... Casa \n", + "... ... ... \n", + "59995 oportunidad!! ideal para oficina o casa habita... Casa \n", + "59996 casa, sala comedor, patio de servicio, buenas ... Casa \n", + "59997 hermosa casa lista para habitarse ubicada en f... Casa \n", + "59998 hermosa casa con acabados de lujo en fracciona... Casa \n", + "59999

hermosa casa bien ubicada. cuenta con:

\\... Casa \n", + "\n", + " direccion ciudad \\\n", + "0 Bosque de Cedros Miguel_Hidalgo \n", + "1 NaN Mérida \n", + "2 Pueblo de los Reyes, Coyoacán, Mexico D.F. Coyoacán \n", + "3 NaN Acapulco_de_Juárez \n", + "4 CEDROS Tultitlán \n", + "... ... ... \n", + "59995 Oriente 172 # 265 Venustiano_Carranza \n", + "59996 colinas Ecatepec_de_Morelos \n", + "59997 s/calle Guadalupe \n", + "59998 Zen House l Querétaro \n", + "59999 Fuentes de San Agustín 5051 Zapopan \n", + "\n", + " provincia antiguedad habitaciones garages ... \\\n", + "0 Distrito_Federal 29.0 3.0 NaN ... \n", + "1 Yucatán NaN 1.0 1.0 ... \n", + "2 Distrito_Federal 0.0 2.0 1.0 ... \n", + "3 Guerrero 2.0 2.0 2.0 ... \n", + "4 Edo._de_México 10.0 2.0 1.0 ... \n", + "... ... ... ... ... ... \n", + "59995 Distrito_Federal 20.0 4.0 3.0 ... \n", + "59996 Edo._de_México 10.0 3.0 1.0 ... \n", + "59997 Nuevo_León 5.0 3.0 2.0 ... \n", + "59998 Querétaro 0.0 2.0 1.0 ... \n", + "59999 Jalisco 0.0 3.0 2.0 ... \n", + "\n", + " cantidad_ciudad_banos cantidad_ciudad_gimnasio \\\n", + "0 54 1213 \n", + "1 252 1677 \n", + "2 503 1249 \n", + "3 230 561 \n", + "4 153 231 \n", + "... ... ... \n", + "59995 30 393 \n", + "59996 172 551 \n", + "59997 180 459 \n", + "59998 1219 3032 \n", + "59999 828 2396 \n", + "\n", + " cantidad_ciudad_usosmultiples cantidad_provincia_antiguedad \\\n", + "0 1255 31 \n", + "1 1798 670 \n", + "2 1249 1999 \n", + "3 572 26 \n", + "4 232 1958 \n", + "... ... ... \n", + "59995 395 2287 \n", + "59996 551 1958 \n", + "59997 487 585 \n", + "59998 3011 1452 \n", + "59999 2315 1201 \n", + "\n", + " cantidad_provincia_habitaciones cantidad_provincia_garages \\\n", + "0 5828 1643 \n", + "1 47 123 \n", + "2 5263 4724 \n", + "3 234 235 \n", + "4 2219 2887 \n", + "... ... ... \n", + "59995 1273 1500 \n", + "59996 5458 2887 \n", + "59997 2136 1546 \n", + "59998 568 509 \n", + "59999 2660 2294 \n", + "\n", + " cantidad_provincia_banos cantidad_provincia_gimnasio \\\n", + "0 710 13668 \n", + "1 278 1854 \n", + "2 5881 13668 \n", + "3 251 604 \n", + "4 2894 9454 \n", + "... ... ... \n", + "59995 2516 13668 \n", + "59996 3466 9454 \n", + "59997 1212 3698 \n", + "59998 1733 4061 \n", + "59999 1835 4979 \n", + "\n", + " cantidad_provincia_usosmultiples cantidad_provincia_piscina \n", + "0 13749 14136 \n", + "1 1980 1575 \n", + "2 13749 14136 \n", + "3 622 310 \n", + "4 9435 9632 \n", + "... ... ... \n", + "59995 13749 14136 \n", + "59996 9435 9632 \n", + "59997 3869 3772 \n", + "59998 4015 3766 \n", + "59999 4865 4690 \n", + "\n", + "[60000 rows x 68 columns]" ] }, "metadata": {}, @@ -713,38 +1324,19 @@ } ], "source": [ - "df_train = pd.read_csv('./masfeatures_train.csv')\n", - "df_eval = pd.read_csv('./masfeatures_test.csv')\n", + "df_test = pd.read_csv('./data/test.csv')\n", + "df1, features1 = concatenar_categoricos(df_test, 'tipodepropiedad', 'provincia', 0, None, True)\n", + "df2, features2 = concatenar_categoricos(df_test, 'provincia', 'ciudad', 0, None, True)\n", "\n", + "features_borarr = features1 + features2\n", + "df_features_sin_ohe = pd.read_csv('./masfeatures_test_f.csv')\n", + "df_features_sin_ohe = df_features_sin_ohe.drop(features_borarr, axis=1)\n", "\n", - "# Correccion csv - strings que deberian ser floats\n", - "df_train['antiguedad'] = df_train['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)\n", - "df_eval['antiguedad'] = df_eval['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)\n", - "df_eval['antiguedad'] = df_eval['antiguedad'].astype(float)\n", - "df_train['antiguedad'] = df_train['antiguedad'].astype(float)\n", - "\n", - "df_train['habitaciones'] = df_train['habitaciones'].apply(lambda x: None if x == 'otros' else x)\n", - "df_eval['habitaciones'] = df_eval['habitaciones'].apply(lambda x: None if x == 'otros' else x)\n", - "df_eval['habitaciones'] = df_eval['habitaciones'].astype(float)\n", - "df_train['habitaciones'] = df_train['habitaciones'].astype(float)\n", + "display(len(df_features_sin_ohe.columns))\n", + "display(df_features_sin_ohe.columns)\n", + "display(df_features_sin_ohe)\n", "\n", - "df_train['garages'] = df_train['garages'].apply(lambda x: None if x == 'otros' else x)\n", - "df_eval['garages'] = df_eval['garages'].apply(lambda x: None if x == 'otros' else x)\n", - "df_eval['garages'] = df_eval['garages'].astype(float)\n", - "df_train['garages'] = df_train['garages'].astype(float)\n", - "\n", - "df_train['banos'] = df_train['banos'].apply(lambda x: None if x == 'otros' else x)\n", - "df_eval['banos'] = df_eval['banos'].apply(lambda x: None if x == 'otros' else x)\n", - "df_eval['banos'] = df_eval['banos'].astype(float)\n", - "df_train['banos'] = df_train['banos'].astype(float)\n", - "\n", - "# Las columnas de tipo object\n", - "x = df_train.columns.to_series().groupby(df_train.dtypes).groups\n", - "display(\"Las columnas de tipo Object:\")\n", - "display(list(x[list(x.keys())[len(list(x.keys()))-1]]))\n", - "\n", - "df_eval.to_csv('masfeatures_train_f.csv', index=False)\n", - "df_train.to_csv('masfeatures_test_f.csv', index=False)" + "df_features_sin_ohe.to_csv('masfeatures_test_f_no_ohe.csv', index=False)" ] } ], diff --git a/html/features_complementarias.html b/html/features_complementarias.html index a958235..1d3a01e 100644 --- a/html/features_complementarias.html +++ b/html/features_complementarias.html @@ -13093,7 +13093,7 @@
-
In [2]:
+
In [3]:
def concatenar_categoricos(df, columna1, columna2, N=0, df_aux=None, devolver_cols=False):
@@ -13122,7 +13122,7 @@
 
-
In [3]:
+
In [ ]:
def cantidad_propiedad_misma_cantidad(df, agrupar_col, misma_cantidad_col):
@@ -13151,22 +13151,33 @@
 
-
In [4]:
+
In [ ]:
-
def cantidad_atributo(df, col):
+
def cantidad_atributo(df, col, df_test):
     '''Simil a value_counts. Dada una columna, agrega al df, cuantas propiedades existen con esa misma propiedad'''
+    '''Se rellena en df_test, con los counts que se hicieron en df'''
     df_aux = df.copy()
     df_aux = df_aux.groupby([col]).agg({'id': 'count'})
     df_aux = df_aux.rename(columns={'id':'cantidad'})
     df_aux = df_aux.reset_index()
+    
+    prop_cantidad = {}
 
     def get_cantidad(col1):
         cantidad = df_aux[df_aux[col] == col1]['cantidad']
-        return cantidad.values[0] if len(cantidad.values > 0) else 0
+        if col1 in prop_cantidad:
+            return prop_cantidad[col1]
+        cantidad_retornar = cantidad.values[0] if len(cantidad.values > 0) else 0
+        prop_cantidad[col1] = cantidad_retornar
+        return cantidad_retornar
     
     df['cantidad_propiedades_en_' + col] = df.apply(lambda x: get_cantidad(x[col]), axis=1)
-    return df
+    
+    df_test['cantidad_propiedades_en_' + col] = df_test.apply(
+                            lambda x: prop_cantidad[x[col]] if x[col] in prop_cantidad else 0, axis=1)
+    
+    return df, df_test
 
@@ -13176,7 +13187,7 @@
-
In [5]:
+
In [ ]:
def ranking_atributo(df, col):
@@ -13217,11 +13228,12 @@
 
-
In [6]:
+
In [ ]:
def aplicar_features(df_recibido):
     df = df_recibido.copy()    
+    # -- 
     df = concatenar_categoricos(df, 'tipodepropiedad', 'provincia')
     df = concatenar_categoricos(df, 'provincia', 'ciudad')
     # -- 
@@ -13326,7 +13338,7 @@
 
-
In [7]:
+
In [ ]:
df = df_train
@@ -13350,170 +13362,85 @@
 
-
-
- - -
- -
- - - - -
-
Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',
-       'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',
-       'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',
-       'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',
-       'centroscomercialescercanos', 'precio'],
-      dtype='object')
-
- -
- -
- -
- - - - -
-
(240000, 23)
-
- -
- -
- -
- - - - -
-
'Check-1.0'
-
- -
- -
- -
- - - - -
-
'Check-1.1'
-
- -
- -
- -
- - - - -
-
'Check-1.2'
-
- -
- -
- -
- - - - -
-
'Check-1.3'
-
-
+
+
+
In [ ]:
+
+
+
df_train = pd.read_csv('./masfeatures_train.csv')
+df_eval = pd.read_csv('./masfeatures_test.csv')
 
-
- -
- - - - -
-
'Check-1.4'
-
-
+# Correccion csv - strings que deberian ser floats +df_train['antiguedad'] = df_train['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad) +df_eval['antiguedad'] = df_eval['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad) +df_eval['antiguedad'] = df_eval['antiguedad'].astype(float) +df_train['antiguedad'] = df_train['antiguedad'].astype(float) -
+df_train['habitaciones'] = df_train['habitaciones'].apply(lambda x: None if x == 'otros' else x) +df_eval['habitaciones'] = df_eval['habitaciones'].apply(lambda x: None if x == 'otros' else x) +df_eval['habitaciones'] = df_eval['habitaciones'].astype(float) +df_train['habitaciones'] = df_train['habitaciones'].astype(float) -
+df_train['garages'] = df_train['garages'].apply(lambda x: None if x == 'otros' else x) +df_eval['garages'] = df_eval['garages'].apply(lambda x: None if x == 'otros' else x) +df_eval['garages'] = df_eval['garages'].astype(float) +df_train['garages'] = df_train['garages'].astype(float) +df_train['banos'] = df_train['banos'].apply(lambda x: None if x == 'otros' else x) +df_eval['banos'] = df_eval['banos'].apply(lambda x: None if x == 'otros' else x) +df_eval['banos'] = df_eval['banos'].astype(float) +df_train['banos'] = df_train['banos'].astype(float) +# Las columnas de tipo object +x = df_train.columns.to_series().groupby(df_train.dtypes).groups +display("Las columnas de tipo Object:") +display(list(x[list(x.keys())[len(list(x.keys()))-1]])) +df_eval.to_csv('masfeatures_train_f.csv', index=False) +df_train.to_csv('masfeatures_test_f.csv', index=False) +
-
-
'Check-1.5'
+
-
-
- -
- - - - -
-
'Check-1.6'
- +
+
+
+

Las mismas features anteriores, sin ohe - train

- -
- -
- - - - -
-
'Check-1.7'
-
+
+
+
In [15]:
+
+
+
df_train = pd.read_csv('./data/train.csv')
+df1, features1 = concatenar_categoricos(df_train, 'tipodepropiedad', 'provincia', 0, None, True)
+df2, features2 = concatenar_categoricos(df_train, 'provincia', 'ciudad', 0, None, True)
 
-
- -
- +features_borarr = features1 + features2 +df_features_sin_ohe = pd.read_csv('./masfeatures_train_f.csv') +df_features_sin_ohe = df_features_sin_ohe.drop(features_borarr, axis=1) +display(len(df_features_sin_ohe.columns)) +display(df_features_sin_ohe.columns) +display(df_features_sin_ohe) +df_features_sin_ohe.to_csv('masfeatures_train_f_no_ohe.csv', index=False) +
-
-
'Check-1.8'
+
-
-
- -
- - - - -
-
'Check-1.9'
-
+
+
-
@@ -13523,7 +13450,7 @@
-
'Check-1.10'
+
69
@@ -13536,7 +13463,39 @@
-
'Check-2.0'
+
Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',
+       'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',
+       'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',
+       'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',
+       'centroscomercialescercanos', 'precio', 'tipodepropiedad_provincia',
+       'provincia_ciudad', 'cantidad_propiedades_en_provincia',
+       'cantidad_propiedades_en_ciudad', 'cantidad_propiedades_en_antiguedad',
+       'cantidad_propiedades_en_habitaciones',
+       'cantidad_propiedades_en_garages', 'cantidad_propiedades_en_banos',
+       'cantidad_propiedades_en_gimnasio',
+       'cantidad_propiedades_en_usosmultiples',
+       'cantidad_propiedades_en_escuelascercanas',
+       'cantidad_propiedades_en_centroscomercialescercanos',
+       'cantidad_propiedades_en_piscina', 'ranking_en_provincia',
+       'ranking_en_tipodepropiedad', 'ranking_en_ciudad',
+       'ranking_en_antiguedad', 'ranking_en_habitaciones',
+       'ranking_en_garages', 'ranking_en_banos', 'ranking_en_gimnasio',
+       'ranking_en_usosmultiples', 'ranking_en_escuelascercanas',
+       'ranking_en_centroscomercialescercanos',
+       'cantidad_tipodepropiedad_provincia', 'cantidad_tipodepropiedad_ciudad',
+       'cantidad_tipodepropiedad_antiguedad',
+       'cantidad_tipodepropiedad_habitaciones',
+       'cantidad_tipodepropiedad_garages', 'cantidad_tipodepropiedad_banos',
+       'cantidad_tipodepropiedad_gimnasio',
+       'cantidad_tipodepropiedad_usosmultiples',
+       'cantidad_tipodepropiedad_piscina', 'cantidad_ciudad_antiguedad',
+       'cantidad_ciudad_habitaciones', 'cantidad_ciudad_garages',
+       'cantidad_ciudad_banos', 'cantidad_ciudad_gimnasio',
+       'cantidad_ciudad_usosmultiples', 'cantidad_provincia_antiguedad',
+       'cantidad_provincia_habitaciones', 'cantidad_provincia_garages',
+       'cantidad_provincia_banos', 'cantidad_provincia_gimnasio',
+       'cantidad_provincia_usosmultiples', 'cantidad_provincia_piscina'],
+      dtype='object')
@@ -13547,90 +13506,359 @@ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
idtitulodescripciontipodepropiedaddireccionciudadprovinciaantiguedadhabitacionesgarages...cantidad_ciudad_banoscantidad_ciudad_gimnasiocantidad_ciudad_usosmultiplescantidad_provincia_antiguedadcantidad_provincia_habitacionescantidad_provincia_garagescantidad_provincia_banoscantidad_provincia_gimnasiocantidad_provincia_usosmultiplescantidad_provincia_piscina
0254099depto. tipo a-402depto. interior de 80.15m2, consta de sala com...ApartamentoAvenida Division del Norte 2005Benito_JuárezDistrito_FederalNaN2.01.0...612710391103927041210621921423514543805474656163
153461condominio horizontal en venta<p>entre sonora y guerrero, atr&aacute;s del h...Casa_en_condominioAV. MEXICOLa_Magdalena_ContrerasDistrito_Federal10.03.02.0...607184218128904234791858723514543805474656163
2247984casa en venta urbi 3 recamaras tonaladescripcion \nla mejor ubicacion residencial e...CasaUrbi TonalaTonaláJalisco5.03.02.0...25778377842561049291807527198941943118756
3209067casa sola en toluca zinacantepec con credito i...casa en privada con caseta de vigilancia casas...CasaIGNACIO MANUEL ALTAMIRANO 128ZinacantepecEdo._de_México1.02.01.0...46149148144994061168912013385413854739277
4185997paseos del solbonito departamento en excelentes condiciones ...ApartamentoPASEOS DEL SOLZapopanJalisco10.02.01.0...1704963493062749418851234434198941943118756
..................................................................
239995119879bonita casas de 2 recamaras a 10 minutos del c...vendo casa en bosques de ica residencial a 10 ...CasaBOSQUESZinacantepecEdo._de_México0.02.02.0...46149148564994061432212013385413854739277
239996259178casa en condominio a 10 min. del centro de tolucacasa con un jardin amplio, un cuarto de servic...CasaFiliberto Navas 325TolucaEdo._de_México0.03.03.0...2271932194756492220059769335385413854739277
239997131932nicolas san juandepartamento con excelente ubicación, muy cerc...ApartamentoNicolas San JuanBenito_JuárezDistrito_Federal20.02.01.0...612710391103929323210621921423514543805474656163
239998146867casa sola. javier rojo gomez.casa sola, dividida en cuatro departamentos de...CasaJavier Rojo Gomez 120IztapalapaDistrito_Federal20.04.00.0...99503028932350658472292644105474656163
239999121958departamento en bosques de las lomas / av. st...id:19816, muy bonito e iluminado departamento,...ApartamentoAVE. STIMCuajimalpa_de_MorelosDistrito_Federal1.03.02.0...1244230924902411234791858723514543805474656163
+

240000 rows × 69 columns

- -
- -
- - - - -
-
'Check-2.2'
-
- -
- - - - -
-
'Check-2.3'
-
-
- -
- - - - -
-
'Check-2.4'
- +
+
+
+

Las mismas features anteriores, sin ohe - train

- -
- -
- - - - -
-
'Check-2.5'
-
+
+
+
In [16]:
+
+
+
df_test = pd.read_csv('./data/test.csv')
+df1, features1 = concatenar_categoricos(df_test, 'tipodepropiedad', 'provincia', 0, None, True)
+df2, features2 = concatenar_categoricos(df_test, 'provincia', 'ciudad', 0, None, True)
 
-
- -
- +features_borarr = features1 + features2 +df_features_sin_ohe = pd.read_csv('./masfeatures_test_f.csv') +df_features_sin_ohe = df_features_sin_ohe.drop(features_borarr, axis=1) +display(len(df_features_sin_ohe.columns)) +display(df_features_sin_ohe.columns) +display(df_features_sin_ohe) +df_features_sin_ohe.to_csv('masfeatures_test_f_no_ohe.csv', index=False) +
-
-
'Check-2.6'
+
-
-
- -
- - - - -
-
'Check-2.7'
-
+
+
-
@@ -13640,7 +13868,7 @@
-
'Check-2.8'
+
68
@@ -13653,7 +13881,39 @@
-
'Check-2.9'
+
Index(['id', 'titulo', 'descripcion', 'tipodepropiedad', 'direccion', 'ciudad',
+       'provincia', 'antiguedad', 'habitaciones', 'garages', 'banos',
+       'metroscubiertos', 'metrostotales', 'idzona', 'lat', 'lng', 'fecha',
+       'gimnasio', 'usosmultiples', 'piscina', 'escuelascercanas',
+       'centroscomercialescercanos', 'tipodepropiedad_provincia',
+       'provincia_ciudad', 'cantidad_propiedades_en_provincia',
+       'cantidad_propiedades_en_ciudad', 'cantidad_propiedades_en_antiguedad',
+       'cantidad_propiedades_en_habitaciones',
+       'cantidad_propiedades_en_garages', 'cantidad_propiedades_en_banos',
+       'cantidad_propiedades_en_gimnasio',
+       'cantidad_propiedades_en_usosmultiples',
+       'cantidad_propiedades_en_escuelascercanas',
+       'cantidad_propiedades_en_centroscomercialescercanos',
+       'cantidad_propiedades_en_piscina', 'ranking_en_provincia',
+       'ranking_en_tipodepropiedad', 'ranking_en_ciudad',
+       'ranking_en_antiguedad', 'ranking_en_habitaciones',
+       'ranking_en_garages', 'ranking_en_banos', 'ranking_en_gimnasio',
+       'ranking_en_usosmultiples', 'ranking_en_escuelascercanas',
+       'ranking_en_centroscomercialescercanos',
+       'cantidad_tipodepropiedad_provincia', 'cantidad_tipodepropiedad_ciudad',
+       'cantidad_tipodepropiedad_antiguedad',
+       'cantidad_tipodepropiedad_habitaciones',
+       'cantidad_tipodepropiedad_garages', 'cantidad_tipodepropiedad_banos',
+       'cantidad_tipodepropiedad_gimnasio',
+       'cantidad_tipodepropiedad_usosmultiples',
+       'cantidad_tipodepropiedad_piscina', 'cantidad_ciudad_antiguedad',
+       'cantidad_ciudad_habitaciones', 'cantidad_ciudad_garages',
+       'cantidad_ciudad_banos', 'cantidad_ciudad_gimnasio',
+       'cantidad_ciudad_usosmultiples', 'cantidad_provincia_antiguedad',
+       'cantidad_provincia_habitaciones', 'cantidad_provincia_garages',
+       'cantidad_provincia_banos', 'cantidad_provincia_gimnasio',
+       'cantidad_provincia_usosmultiples', 'cantidad_provincia_piscina'],
+      dtype='object')
@@ -13664,393 +13924,317 @@ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
idtitulodescripciontipodepropiedaddireccionciudadprovinciaantiguedadhabitacionesgarages...cantidad_ciudad_banoscantidad_ciudad_gimnasiocantidad_ciudad_usosmultiplescantidad_provincia_antiguedadcantidad_provincia_habitacionescantidad_provincia_garagescantidad_provincia_banoscantidad_provincia_gimnasiocantidad_provincia_usosmultiplescantidad_provincia_piscina
04941casa en venta en miguel hidalgo, distrito federal<p>excelente casa estilo moderno.</p>CasaBosque de CedrosMiguel_HidalgoDistrito_Federal29.03.0NaN...54121312553158281643710136681374914136
151775departamentos en venta en montebello<p>departamento una recamara:\n</p><p>departam...ApartamentoNaNMéridaYucatánNaN1.01.0...2521677179867047123278185419801575
2115253departamento nuevo delegación coyoacán de 87 m...departamento nuevo de 87.06 m2, 1 cajón de est...ApartamentoPueblo de los Reyes, Coyoacán, Mexico D.F.CoyoacánDistrito_Federal0.02.01.0...503124912491999526347245881136681374914136
3299321departamento en venta en acapulco<p> raíces dv001 precioso departamento tipo k...ApartamentoNaNAcapulco_de_JuárezGuerrero2.02.02.0...23056157226234235251604622310
4173570bonita casa sola equipada de dos niveles en lo...<p>casa sola, bonita de dos rec&aacute;maras u...CasaCEDROSTultitlánEdo._de_México10.02.01.0...1532312321958221928872894945494359632
..................................................................
5999575094oportunidad!! se vende amplia casa en col. moc...oportunidad!! ideal para oficina o casa habita...CasaOriente 172 # 265Venustiano_CarranzaDistrito_Federal20.04.03.0...303933952287127315002516136681374914136
59996171847colinas de ecatepeccasa, sala comedor, patio de servicio, buenas ...CasacolinasEcatepec_de_MorelosEdo._de_México10.03.01.0...1725515511958545828873466945494359632
59997138313estrene hermosa casa en sierra morenahermosa casa lista para habitarse ubicada en f...Casas/calleGuadalupeNuevo_León5.03.02.0...180459487585213615461212369838693772
59998271268zen house i venta de linda casa con acabados ...hermosa casa con acabados de lujo en fracciona...CasaZen House lQuerétaroQuerétaro0.02.01.0...12193032301114525685091733406140153766
5999972612casa en chapalita las fuentes/ cercano al club...<p>hermosa casa bien ubicada. cuenta con:</p>\...CasaFuentes de San Agustín 5051ZapopanJalisco0.03.02.0...828239623151201266022941835497948654690
+

60000 rows × 68 columns

- -
- -
- - - - -
-
'Check-3.0'
-
- -
- -
- -
- - - - -
-
'Check-3.1'
-
- -
- -
- -
- - - - -
-
'Check-3.2'
-
- -
- -
- -
- - - - -
-
'Check-3.3'
-
- -
- -
- -
- - - - -
-
'Check-3.4'
-
- -
- -
- -
- - - - -
-
'Check-3.5'
-
- -
- -
- -
- - - - -
-
'Check-3.6'
-
- -
- -
- -
- - - - -
-
'Check-3.7'
-
- -
- -
- -
- - - - -
-
'Check-3.8'
-
- -
- -
- -
- - - - -
-
'Check-4.0'
-
- -
- -
- -
- - - - -
-
'Check-4.1'
-
- -
- -
- -
- - - - -
-
'Check-4.2'
-
- -
- -
- -
- - - - -
-
'Check-4.3'
-
- -
- -
- -
- - - - -
-
'Check-4.4'
-
- -
- -
- -
- - - - -
-
'Check-4.5'
-
- -
- -
- -
- - - - -
-
'Check-5.6'
-
- -
- -
- -
- - - - -
-
'Check-5.7'
-
- -
- -
- -
- - - - -
-
'Check-5.8'
-
- -
- -
- -
- - - - -
-
'Check-5.9'
-
- -
- -
- -
- - - - -
-
'Check-5.10'
-
- -
- -
- -
- - - - -
-
'Check-5.11'
-
- -
- -
- -
- - - - -
-
'Check-5.12'
-
- -
- -
- -
- - - - -
-
'Check-6.0'
-
- -
- -
-
- -
-
-
-
In [3]:
-
-
-
df_train = pd.read_csv('./masfeatures_train.csv')
-df_eval = pd.read_csv('./masfeatures_test.csv')
-
-
-# Correccion csv - strings que deberian ser floats
-df_train['antiguedad'] = df_train['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)
-df_eval['antiguedad'] = df_eval['antiguedad'].apply(lambda antiguedad: None if antiguedad == 'otros' else antiguedad)
-df_eval['antiguedad'] = df_eval['antiguedad'].astype(float)
-df_train['antiguedad'] = df_train['antiguedad'].astype(float)
-
-df_train['habitaciones'] = df_train['habitaciones'].apply(lambda x: None if x == 'otros' else x)
-df_eval['habitaciones'] = df_eval['habitaciones'].apply(lambda x: None if x == 'otros' else x)
-df_eval['habitaciones'] = df_eval['habitaciones'].astype(float)
-df_train['habitaciones'] = df_train['habitaciones'].astype(float)
-
-df_train['garages'] = df_train['garages'].apply(lambda x: None if x == 'otros' else x)
-df_eval['garages'] = df_eval['garages'].apply(lambda x: None if x == 'otros' else x)
-df_eval['garages'] = df_eval['garages'].astype(float)
-df_train['garages'] = df_train['garages'].astype(float)
-
-df_train['banos'] = df_train['banos'].apply(lambda x: None if x == 'otros' else x)
-df_eval['banos'] = df_eval['banos'].apply(lambda x: None if x == 'otros' else x)
-df_eval['banos'] = df_eval['banos'].astype(float)
-df_train['banos'] = df_train['banos'].astype(float)
-
-# Las columnas de tipo object
-x = df_train.columns.to_series().groupby(df_train.dtypes).groups
-display("Las columnas de tipo Object:")
-display(list(x[list(x.keys())[len(list(x.keys()))-1]]))
-
-df_eval.to_csv('masfeatures_train_f.csv', index=False)
-df_train.to_csv('masfeatures_test_f.csv', index=False)
-
- -
-
-
- -
-
- - -
- -
- - - - -
-
'Las columnas de tipo Object:'
-
- -
- -
- -
- - - - -
-
['titulo',
- 'descripcion',
- 'tipodepropiedad',
- 'direccion',
- 'ciudad',
- 'provincia',
- 'fecha',
- 'tipodepropiedad_provincia',
- 'provincia_ciudad']