Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions servel_scraper/data_extractor/csv_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from servel_scraper.data_extractor.person import Person

OUT_CSV_DELIMITER = ','
OUT_CSV_DELIMITER = ';'
OUT_CSV_QUOTE_CHAR = '"'


Expand All @@ -16,7 +16,7 @@ def write_rows(self, people: List[Person]):
self.write_row(p)

def write_row(self, p: Person):
self._csv_writer.writerow([p.name, p.rut, str(p.gender), p.address])
self._csv_writer.writerow([p.name, p.rut, str(p.gender), p.address, p.comuna])


class CSVExporter(object):
Expand Down
1 change: 1 addition & 0 deletions servel_scraper/data_extractor/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ class Person:
rut: str
address: str
gender: Gender
comuna: str
4 changes: 3 additions & 1 deletion servel_scraper/data_extractor/servel_pdf_stream_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
RUT_FIELD_INDEX = 1
GENDER_FIELD_INDEX = 2
ADDRESS_FIELD_INDEX = 3
COMUNA_FIELD_INDEX = 4


class ServelPDFStreamPage(object):
Expand Down Expand Up @@ -109,7 +110,8 @@ def _convert_row_of_strings_to_person(cls, row: List[str]) -> Person:
name=row[NAME_FIELD_INDEX],
rut=row[RUT_FIELD_INDEX],
gender=cls._str_gender_to_gender(row[GENDER_FIELD_INDEX]),
address=row[ADDRESS_FIELD_INDEX]
address=row[ADDRESS_FIELD_INDEX],
comuna=row[COMUNA_FIELD_INDEX]
)

@staticmethod
Expand Down
2 changes: 2 additions & 0 deletions servel_scraper/data_extractor/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ def test_extract_with_valid_file__must_be_extract_data(self):
self.assertEqual(persons[0].gender, Gender.MALE)
self.assertEqual(persons[0].rut, '13.392.711-5')
self.assertEqual(persons[0].address, 'ISLA REY JORGE SIN N')
self.assertEqual(persons[0].comuna, 'ANTARTICA')
# Ultima persona
self.assertEqual(persons[EXPECTED_SIZE - 1].name, 'ZUÑIGA MIRANDA MARIA ELENA')
self.assertEqual(persons[EXPECTED_SIZE - 1].gender, Gender.FEMALE)
self.assertEqual(persons[EXPECTED_SIZE - 1].rut, '11.660.676-3')
self.assertEqual(persons[EXPECTED_SIZE - 1].address, 'BA E FREI')
self.assertEqual(persons[EXPECTED_SIZE - 1].comuna, 'ANTARTICA')
10 changes: 5 additions & 5 deletions servel_scraper/downloader/servel_file_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from servel_scraper.downloader.file_downloader import FileDownloader
from tqdm import tqdm

CDN_BASE_URL = 'http://cdn.servel.cl/padron'
CDN_BASE_URL = 'http://www.paltabi.cl/secure'

_DEFAULT_CUT_COLUMNS_CSV: Dict[CUTField, str] = {
CUTField.NOMBRE_REGION: 'Nombre Región',
Expand All @@ -28,8 +28,8 @@ class ServerFilesUrlRepo(object):
def get_servel_files_urls(cls, csv_path: str) -> List[str]:
"""
i.e: [
"http://cdn.servel.cl/padron/A01107.pdf",
"http://cdn.servel.cl/padron/01101.pdf",
"http://http://www.paltabi.cl/secure/A01107.pdf",
"http://http://www.paltabi.cl/secure/01101.pdf",
...
]
"""
Expand All @@ -48,7 +48,7 @@ def _get_cuts(csv_path: str) -> List[CUT]:
@staticmethod
def _build_url_from_cut(cut: CUT) -> Url:
"""
i.e: "http://cdn.servel.cl/padron/A01107.pdf"
i.e: "http://www.paltabi.cl/secure/A01107.pdf"
"""
return f"{CDN_BASE_URL}/A{cut.codigo_comuna}.pdf"

Expand All @@ -68,7 +68,7 @@ def download_servel_files(out_path: str, csv_path: str = DEFAULT_CUT_CSV_PATH) -
result = DownloadResult(failed={}, success=[])
pbar = tqdm(urls)
for url in pbar:
pbar.set_description(f"Downloading: {url}")
pbar.set_description(f"Descargando: {url}")
try:
file_downloader.download_file(out_path, url)
except (BaseException, Exception) as e:
Expand Down