Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e396c64
initial commit of recent-collabs builder
sbillinge Jan 3, 2020
a6b9851
now extracts people it finds in author list, but only if in people co…
sbillinge Jan 3, 2020
dd4d779
WIP working and returns set of folks in people coll
sbillinge Jan 3, 2020
54e3ee1
proper name parsing in recentcollabs builder
sbillinge Jan 5, 2020
49a105b
tweaking error handling in recent_collabs
sbillinge Jan 5, 2020
8799307
adding dateutil to requirements
sbillinge Jan 10, 2020
95eff79
ENH: add needed_dbs
dragonyanglong Feb 5, 2020
2e164af
MAINT: replace sbillinge with people argument
dragonyanglong Feb 5, 2020
68145d1
catch tbd months
sbillinge Feb 15, 2020
484abe5
more friendly fail when no person is specified
sbillinge Feb 16, 2020
5db5ea0
now extracts people it finds in author list, but only if in people co…
sbillinge Jan 3, 2020
a6bc4ba
people seems to be enforced as list in p3.8]
sbillinge Feb 17, 2020
71e55dc
test file
Feb 19, 2020
f2afba3
added coa_template.xlsx
Feb 19, 2020
b6fbe30
- added script coabuilder.py filling in excel template
Feb 20, 2020
900f02f
- removed the duplicate entries
Feb 21, 2020
4ca5487
added global variable NUM_MONTHS
Feb 21, 2020
abeadf0
requirements should have python-dateutil not just dateutil
sbillinge Mar 4, 2020
0b5677d
remove missing review-man test for test_builders. This should be in a…
sbillinge Mar 4, 2020
232b17a
changing tests so that recent collabs will run with scopatz as person
sbillinge Mar 4, 2020
c208195
added function filter for advisors and positions, status of last comm…
SaniHarouna-Mayer Mar 4, 2020
d29c162
add advisor to schema @ education and employment
SaniHarouna-Mayer Mar 4, 2020
73f3373
deleted duplicated function filter grants @tools.py
SaniHarouna-Mayer Mar 5, 2020
4f25ea5
in EXEMPLARS - add an advisor entry for one entry in employment and o…
SaniHarouna-Mayer Mar 5, 2020
1fe7cd2
add descripotion to advisor @employment & @education schema. descript…
SaniHarouna-Mayer Mar 5, 2020
c1b527b
update schema.py -> education and employment -> advisor -> descriptio…
SaniHarouna-Mayer Mar 16, 2020
100c5ea
update schema.py -> change key name advisor to mentor
SaniHarouna-Mayer Mar 16, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ env:

matrix:
include:
- python: 3.6
- python: 3.8

install:
# Install conda
Expand Down
Empty file added news/test.rst
Empty file.
2 changes: 2 additions & 0 deletions regolith/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from regolith.builders.resumebuilder import ResumeBuilder
from regolith.builders.cpbuilder import CPBuilder
from regolith.builders.figurebuilder import FigureBuilder
from regolith.builders.coabuilder import RecentCollaboratorsBuilder


BUILDERS = {
Expand All @@ -28,6 +29,7 @@
"preslist": PresListBuilder,
"reimb": ReimbursementBuilder,
"figure": FigureBuilder,
"recent-collabs": RecentCollaboratorsBuilder,
}


Expand Down
187 changes: 187 additions & 0 deletions regolith/builders/coabuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""Builder for Resumes."""

import datetime as dt
import os
import sys
import openpyxl

from regolith.builders.basebuilder import BuilderBase
from regolith.dates import month_to_int
from regolith.sorters import doc_date_key, ene_date_key, position_key
from regolith.tools import all_docs_from_collection, filter_publications, \
month_and_year, fuzzy_retrieval, is_since
from copy import copy
from dateutil.relativedelta import relativedelta
from operator import itemgetter


NUM_MONTHS = 48

def mdy_date(month, day, year, **kwargs):
if isinstance(month, str):
month = month_to_int(month)
return dt.date(year, month, day)


def mdy(month, day, year, **kwargs):
return "{}/{}/{}".format(
str(month_to_int(month)).zfill(2), str(day).zfill(2), str(year)[-2:]
)


class RecentCollaboratorsBuilder(BuilderBase):
"""Build recent collaborators from database entries"""

btype = "recent-collabs"
needed_dbs = ['citations', 'people', 'contacts', 'institutions']

def __init__(self, rc):
super().__init__(rc)
self.template = os.path.join(
os.path.dirname(os.path.dirname(__file__)), "templates", "coa_template.xlsx"
)
self.cmds = ["excel"]

def construct_global_ctx(self):
super().construct_global_ctx()
gtx = self.gtx
rc = self.rc

gtx["people"] = sorted(
all_docs_from_collection(rc.client, "people"),
key=position_key,
reverse=True,
)
gtx["contacts"] = sorted(
all_docs_from_collection(rc.client, "contacts"),
key=position_key,
reverse=True,
)
gtx["institutions"] = all_docs_from_collection(rc.client,
"institutions")
gtx["citations"] = all_docs_from_collection(rc.client, "citations")
gtx["all_docs_from_collection"] = all_docs_from_collection

def excel(self):
rc = self.rc
gtx = self.gtx
since_date = dt.date.today() - relativedelta(months=NUM_MONTHS)
if isinstance(self.rc.people, str):
self.rc.people = [self.rc.people]
person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"),
['aka', 'name', '_id'], self.rc.people[0],
case_sensitive=False)
if not person:
sys.exit("please rerun specifying --people PERSON")
for p in self.gtx["people"]:
if p["_id"] == person["_id"]:
my_names = frozenset(p.get("aka", []) + [p["name"]])
pubs = filter_publications(self.gtx["citations"], my_names,
reverse=True, bold=False)
my_collabs = []
for pub in pubs:
if is_since(pub.get("year"), since_date.year,
pub.get("month", 1), since_date.month):
if not pub.get("month"):
print("WARNING: {} is missing month".format(
pub["_id"]))
if pub.get("month") == "tbd".casefold():
print("WARNING: month in {} is tbd".format(
pub["_id"]))

my_collabs.extend([collabs for collabs in
[names for names in
pub.get('author', [])]])
people, institutions = [], []
for collab in my_collabs:
person = fuzzy_retrieval(all_docs_from_collection(
rc.client, "people"),
["name", "aka", "_id"],
collab)
if not person:
person = fuzzy_retrieval(all_docs_from_collection(
rc.client, "contacts"),
["name", "aka", "_id"], collab)
if not person:
print(
"WARNING: {} not found in contacts. Check aka".format(
collab))
else:
people.append(person)
inst = fuzzy_retrieval(all_docs_from_collection(
rc.client, "institutions"),
["name", "aka", "_id"],
person["institution"])
if inst:
institutions.append(inst["name"])
else:
institutions.append(
person.get("institution", "missing"))
print(
"WARNING: {} missing from institutions".format(
person["institution"]))
else:
people.append(person)
pinst = person.get("employment",
[{"organization": "missing"}])[
0]["organization"]
inst = fuzzy_retrieval(all_docs_from_collection(
rc.client, "institutions"), ["name", "aka", "_id"],
pinst)
if inst:
institutions.append(inst["name"])
else:
institutions.append(pinst)
print(
"WARNING: {} missing from institutions".format(
pinst))
ppl_names = [(person["name"], i) for
person, i in zip(people, institutions) if
person]
ppl = []
# reformatting the name in last name, first name
for idx in range(len(ppl_names)):
names = ppl_names[idx][0].split()
last_name = names[-1]
first_name = ' '.join(names[:-1])
name_reformatted = ', '.join([last_name, first_name])
ppl.append((name_reformatted, ppl_names[idx][1]))
ppl = list(set(ppl))
# sorting the ppl list
ppl_sorted = sorted(ppl, key=itemgetter(0))
# print(set([person["name"] for person in people if person]))
#print(set([person for person in ppl_names]))
emp = p.get("employment", [{"organization": "missing",
"begin_year": 2019}])
emp.sort(key=ene_date_key, reverse=True)

def apply_cell_style(cell, style):
cell.font = style["font"]
cell.border = style["border"]
cell.fill = style["fill"]
cell.alignment = style["alignment"]
template = self.template
num_rows = len(ppl) # number of rows to add to the excel file
wb = openpyxl.load_workbook(template)
ws = wb.worksheets[0]
ws.delete_rows(52, amount=3) # removing the example rows
ws.move_range("A52:E66", rows=num_rows, cols=0, translate=True)
style_ref_cell = ws["B51"]
template_cell_style = {}
template_cell_style["font"] = copy(style_ref_cell.font)
template_cell_style["border"] = copy(style_ref_cell.border)
template_cell_style["fill"] = copy(style_ref_cell.fill)
template_cell_style["alignment"] = copy(style_ref_cell.alignment)
col_idx = ["A", "B", "C", "D", "E"]
for row in range(1, num_rows + 1):
try:
ws.unmerge_cells("A{}:E{}".format(row + 51, row + 51))
except:
pass
for idx in range(len(col_idx)):
apply_cell_style(ws["{}{}".format(col_idx[idx], row + 51)], template_cell_style)
ws["A{}".format(row + 51)].value = "A:"
ws["B{}".format(row + 51)].value = ppl_sorted[row - 1][0]
ws["C{}".format((row + 51))].value = ppl_sorted[row - 1][1]
ws.delete_rows(51) # deleting the reference row
wb.save(os.path.join(self.bldir, "coa_table.xlsx"))
175 changes: 175 additions & 0 deletions regolith/builders/recentcollabsbuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
"""Builder for publication lists."""
import os
import datetime as dt
import sys
from copy import copy
from dateutil.relativedelta import relativedelta

try:
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabase

HAVE_BIBTEX_PARSER = True
except ImportError:
HAVE_BIBTEX_PARSER = False

from regolith.tools import all_docs_from_collection, filter_publications, \
is_since, fuzzy_retrieval
from regolith.sorters import doc_date_key, ene_date_key, position_key
from regolith.builders.basebuilder import LatexBuilderBase, latex_safe

LATEX_OPTS = ["-halt-on-error", "-file-line-error"]


class RecentCollabsBuilder(LatexBuilderBase):
btype = "recent-collabs"
needed_dbs = ['citations', 'people', 'contacts', 'institutions']

def construct_global_ctx(self):
super().construct_global_ctx()
gtx = self.gtx
rc = self.rc

gtx["people"] = sorted(
all_docs_from_collection(rc.client, "people"),
key=position_key,
reverse=True,
)
gtx["contacts"] = sorted(
all_docs_from_collection(rc.client, "contacts"),
key=position_key,
reverse=True,
)
gtx["institutions"] = all_docs_from_collection(rc.client,
"institutions")
gtx["citations"] = all_docs_from_collection(rc.client, "citations")
gtx["all_docs_from_collection"] = all_docs_from_collection

def latex(self):
rc = self.rc
since_date = dt.date.today() - relativedelta(months=48)
if isinstance(self.rc.people, str):
self.rc.people = [self.rc.people]
person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"),
['aka', 'name', '_id'], self.rc.people[0],
case_sensitive=False)
if not person:
sys.exit("please rerun specifying --people PERSON")
for p in self.gtx["people"]:
if p["_id"] == person["_id"]:
my_names = frozenset(p.get("aka", []) + [p["name"]])
pubs = filter_publications(self.gtx["citations"], my_names,
reverse=True, bold=False)
my_collabs = []
for pub in pubs:
if is_since(pub.get("year"), since_date.year,
pub.get("month", 1), since_date.month):
if not pub.get("month"):
print("WARNING: {} is missing month".format(
pub["_id"]))
if pub.get("month") == "tbd".casefold():
print("WARNING: month in {} is tbd".format(
pub["_id"]))

my_collabs.extend([collabs for collabs in
[names for names in
pub.get('author', [])]])
people, institutions = [], []
for collab in my_collabs:
person = fuzzy_retrieval(all_docs_from_collection(
rc.client, "people"),
["name", "aka", "_id"],
collab)
if not person:
person = fuzzy_retrieval(all_docs_from_collection(
rc.client, "contacts"),
["name", "aka", "_id"], collab)
if not person:
print(
"WARNING: {} not found in contacts. Check aka".format(
collab))
else:
people.append(person)
inst = fuzzy_retrieval(all_docs_from_collection(
rc.client, "institutions"),
["name", "aka", "_id"],
person["institution"])
if inst:
institutions.append(inst["name"])
else:
institutions.append(
person.get("institution", "missing"))
print(
"WARNING: {} missing from institutions".format(
person["institution"]))
else:
people.append(person)
pinst = person.get("employment",
[{"organization": "missing"}])[
0]["organization"]
inst = fuzzy_retrieval(all_docs_from_collection(
rc.client, "institutions"), ["name", "aka", "_id"],
pinst)
if inst:
institutions.append(inst["name"])
else:
institutions.append(pinst)
print(
"WARNING: {} missing from institutions".format(
pinst))
ppl_names = [(person["name"], i) for
person, i in zip(people, institutions) if
person]
# print(set([person["name"] for person in people if person]))
print(set([person for person in ppl_names]))
emp = p.get("employment", [{"organization": "missing",
"begin_year": 2019}])
emp.sort(key=ene_date_key, reverse=True)
self.render(
"recentcollabs.csv",
p["_id"] + ".csv",
p=p,
title=p.get("name", ""),
pubs=pubs,
names=names,
bibfile=bibfile,
employment=emp,
collabs=my_collabs
)
self.pdf(p["_id"])

def filter_publications(self, authors, reverse=False):
rc = self.rc
pubs = []
for pub in all_docs_from_collection(rc.client, "citations"):
if len(set(pub["author"]) & authors) == 0:
continue
bold_self = []
for a in pub["author"]:
if a in authors:
bold_self.append("\\textbf{" + a + "}")
else:
bold_self.append(a)
pub["author"] = bold_self
pubs.append(pub)
pubs.sort(key=doc_date_key, reverse=reverse)
return pubs

def make_bibtex_file(self, pubs, pid, person_dir="."):
if not HAVE_BIBTEX_PARSER:
return None
skip_keys = set(["ID", "ENTRYTYPE", "author"])
self.bibdb.entries = ents = []
for pub in pubs:
ent = dict(pub)
ent["ID"] = ent.pop("_id")
ent["ENTRYTYPE"] = ent.pop("entrytype")
ent["author"] = " and ".join(ent["author"])
for key in ent.keys():
if key in skip_keys:
continue
ents.append(ent)
fname = os.path.join(person_dir, pid) + ".bib"
with open(fname, "w", encoding='utf-8') as f:
f.write(self.bibwriter.write(self.bibdb))
return fname
2 changes: 1 addition & 1 deletion regolith/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"dec.": 12,
"december": 12,
"": 1,
"tbd": 1,
"tbd": 1
}


Expand Down
Loading