sbillinge · SaniHarouna-Mayer · Jan 3, 2020 · Jan 3, 2020 · Jan 3, 2020 · Jan 5, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -8,7 +8,7 @@ env:
 
 matrix:
   include:
-    - python: 3.6
+    - python: 3.8
 
 install:
   # Install conda

diff --git a/news/test.rst b/news/test.rst
diff --git a/regolith/builder.py b/regolith/builder.py
@@ -12,6 +12,7 @@
 from regolith.builders.resumebuilder import ResumeBuilder
 from regolith.builders.cpbuilder import CPBuilder
 from regolith.builders.figurebuilder import FigureBuilder
+from regolith.builders.coabuilder import RecentCollaboratorsBuilder
 
 
 BUILDERS = {
@@ -28,6 +29,7 @@
     "preslist": PresListBuilder,
     "reimb": ReimbursementBuilder,
     "figure": FigureBuilder,
+    "recent-collabs": RecentCollaboratorsBuilder,
 }
 
 

diff --git a/regolith/builders/coabuilder.py b/regolith/builders/coabuilder.py
@@ -0,0 +1,187 @@
+"""Builder for Resumes."""
+
+import datetime as dt
+import os
+import sys
+import openpyxl
+
+from regolith.builders.basebuilder import BuilderBase
+from regolith.dates import month_to_int
+from regolith.sorters import doc_date_key, ene_date_key, position_key
+from regolith.tools import all_docs_from_collection, filter_publications, \
+    month_and_year, fuzzy_retrieval, is_since
+from copy import copy
+from dateutil.relativedelta import relativedelta
+from operator import itemgetter
+
+
+NUM_MONTHS = 48
+
+def mdy_date(month, day, year, **kwargs):
+    if isinstance(month, str):
+        month = month_to_int(month)
+    return dt.date(year, month, day)
+
+
+def mdy(month, day, year, **kwargs):
+    return "{}/{}/{}".format(
+        str(month_to_int(month)).zfill(2), str(day).zfill(2), str(year)[-2:]
+    )
+
+
+class RecentCollaboratorsBuilder(BuilderBase):
+    """Build recent collaborators from database entries"""
+
+    btype = "recent-collabs"
+    needed_dbs = ['citations', 'people', 'contacts', 'institutions']
+
+    def __init__(self, rc):
+        super().__init__(rc)
+        self.template = os.path.join(
+            os.path.dirname(os.path.dirname(__file__)), "templates", "coa_template.xlsx"
+        )
+        self.cmds = ["excel"]
+
+    def construct_global_ctx(self):
+        super().construct_global_ctx()
+        gtx = self.gtx
+        rc = self.rc
+
+        gtx["people"] = sorted(
+            all_docs_from_collection(rc.client, "people"),
+            key=position_key,
+            reverse=True,
+        )
+        gtx["contacts"] = sorted(
+            all_docs_from_collection(rc.client, "contacts"),
+            key=position_key,
+            reverse=True,
+        )
+        gtx["institutions"] = all_docs_from_collection(rc.client,
+                                                       "institutions")
+        gtx["citations"] = all_docs_from_collection(rc.client, "citations")
+        gtx["all_docs_from_collection"] = all_docs_from_collection
+
+    def excel(self):
+        rc = self.rc
+        gtx = self.gtx
+        since_date = dt.date.today() - relativedelta(months=NUM_MONTHS)
+        if isinstance(self.rc.people, str):
+            self.rc.people = [self.rc.people]
+        person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"),
+                                 ['aka', 'name', '_id'], self.rc.people[0],
+                                 case_sensitive=False)
+        if not person:
+            sys.exit("please rerun specifying --people PERSON")
+        for p in self.gtx["people"]:
+            if p["_id"] == person["_id"]:
+                my_names = frozenset(p.get("aka", []) + [p["name"]])
+                pubs = filter_publications(self.gtx["citations"], my_names,
+                                           reverse=True, bold=False)
+                my_collabs = []
+                for pub in pubs:
+                    if is_since(pub.get("year"), since_date.year,
+                                pub.get("month", 1), since_date.month):
+                        if not pub.get("month"):
+                            print("WARNING: {} is missing month".format(
+                                pub["_id"]))
+                        if pub.get("month") == "tbd".casefold():
+                            print("WARNING: month in {} is tbd".format(
+                                pub["_id"]))
+
+                        my_collabs.extend([collabs for collabs in
+                                           [names for names in
+                                            pub.get('author', [])]])
+                people, institutions = [], []
+                for collab in my_collabs:
+                    person = fuzzy_retrieval(all_docs_from_collection(
+                        rc.client, "people"),
+                        ["name", "aka", "_id"],
+                        collab)
+                    if not person:
+                        person = fuzzy_retrieval(all_docs_from_collection(
+                            rc.client, "contacts"),
+                            ["name", "aka", "_id"], collab)
+                        if not person:
+                            print(
+                                "WARNING: {} not found in contacts. Check aka".format(
+                                    collab))
+                        else:
+                            people.append(person)
+                            inst = fuzzy_retrieval(all_docs_from_collection(
+                                rc.client, "institutions"),
+                                ["name", "aka", "_id"],
+                                person["institution"])
+                            if inst:
+                                institutions.append(inst["name"])
+                            else:
+                                institutions.append(
+                                    person.get("institution", "missing"))
+                                print(
+                                    "WARNING: {} missing from institutions".format(
+                                        person["institution"]))
+                    else:
+                        people.append(person)
+                        pinst = person.get("employment",
+                                           [{"organization": "missing"}])[
+                            0]["organization"]
+                        inst = fuzzy_retrieval(all_docs_from_collection(
+                            rc.client, "institutions"), ["name", "aka", "_id"],
+                            pinst)
+                        if inst:
+                            institutions.append(inst["name"])
+                        else:
+                            institutions.append(pinst)
+                            print(
+                                "WARNING: {} missing from institutions".format(
+                                    pinst))
+                ppl_names = [(person["name"], i) for
+                             person, i in zip(people, institutions) if
+                             person]
+                ppl = []
+                # reformatting the name in last name, first name
+                for idx in range(len(ppl_names)):
+                    names = ppl_names[idx][0].split()
+                    last_name = names[-1]
+                    first_name = ' '.join(names[:-1])
+                    name_reformatted = ', '.join([last_name, first_name])
+                    ppl.append((name_reformatted, ppl_names[idx][1]))
+                ppl = list(set(ppl))
+                # sorting the ppl list
+                ppl_sorted = sorted(ppl, key=itemgetter(0))
+                #                print(set([person["name"] for person in people if person]))
+                #print(set([person for person in ppl_names]))
+            emp = p.get("employment", [{"organization": "missing",
+                                        "begin_year": 2019}])
+            emp.sort(key=ene_date_key, reverse=True)
+
+        def apply_cell_style(cell, style):
+            cell.font = style["font"]
+            cell.border = style["border"]
+            cell.fill = style["fill"]
+            cell.alignment = style["alignment"]
+        template = self.template
+        num_rows = len(ppl)  # number of rows to add to the excel file
+        wb = openpyxl.load_workbook(template)
+        ws = wb.worksheets[0]
+        ws.delete_rows(52, amount=3) # removing the example rows
+        ws.move_range("A52:E66", rows=num_rows, cols=0, translate=True)
+        style_ref_cell = ws["B51"]
+        template_cell_style = {}
+        template_cell_style["font"] = copy(style_ref_cell.font)
+        template_cell_style["border"] = copy(style_ref_cell.border)
+        template_cell_style["fill"] = copy(style_ref_cell.fill)
+        template_cell_style["alignment"] = copy(style_ref_cell.alignment)
+        col_idx = ["A", "B", "C", "D", "E"]
+        for row in range(1, num_rows + 1):
+            try:
+                ws.unmerge_cells("A{}:E{}".format(row + 51, row + 51))
+            except:
+                pass
+            for idx in range(len(col_idx)):
+                apply_cell_style(ws["{}{}".format(col_idx[idx], row + 51)], template_cell_style)
+            ws["A{}".format(row + 51)].value = "A:"
+            ws["B{}".format(row + 51)].value = ppl_sorted[row - 1][0]
+            ws["C{}".format((row + 51))].value = ppl_sorted[row - 1][1]
+        ws.delete_rows(51)  # deleting the reference row
+        wb.save(os.path.join(self.bldir, "coa_table.xlsx"))
diff --git a/regolith/builders/recentcollabsbuilder.py b/regolith/builders/recentcollabsbuilder.py
@@ -0,0 +1,175 @@
+"""Builder for publication lists."""
+import os
+import datetime as dt
+import sys
+from copy import copy
+from dateutil.relativedelta import relativedelta
+
+try:
+    from bibtexparser.bwriter import BibTexWriter
+    from bibtexparser.bibdatabase import BibDatabase
+
+    HAVE_BIBTEX_PARSER = True
+except ImportError:
+    HAVE_BIBTEX_PARSER = False
+
+from regolith.tools import all_docs_from_collection, filter_publications, \
+    is_since, fuzzy_retrieval
+from regolith.sorters import doc_date_key, ene_date_key, position_key
+from regolith.builders.basebuilder import LatexBuilderBase, latex_safe
+
+LATEX_OPTS = ["-halt-on-error", "-file-line-error"]
+
+
+class RecentCollabsBuilder(LatexBuilderBase):
+    btype = "recent-collabs"
+    needed_dbs = ['citations', 'people', 'contacts', 'institutions']
+
+    def construct_global_ctx(self):
+        super().construct_global_ctx()
+        gtx = self.gtx
+        rc = self.rc
+
+        gtx["people"] = sorted(
+            all_docs_from_collection(rc.client, "people"),
+            key=position_key,
+            reverse=True,
+        )
+        gtx["contacts"] = sorted(
+            all_docs_from_collection(rc.client, "contacts"),
+            key=position_key,
+            reverse=True,
+        )
+        gtx["institutions"] = all_docs_from_collection(rc.client,
+                                                       "institutions")
+        gtx["citations"] = all_docs_from_collection(rc.client, "citations")
+        gtx["all_docs_from_collection"] = all_docs_from_collection
+
+    def latex(self):
+        rc = self.rc
+        since_date = dt.date.today() - relativedelta(months=48)
+        if isinstance(self.rc.people, str):
+            self.rc.people = [self.rc.people]
+        person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"),
+                                 ['aka', 'name', '_id'], self.rc.people[0],
+                                 case_sensitive=False)
+        if not person:
+            sys.exit("please rerun specifying --people PERSON")
+        for p in self.gtx["people"]:
+            if p["_id"] == person["_id"]:
+                my_names = frozenset(p.get("aka", []) + [p["name"]])
+                pubs = filter_publications(self.gtx["citations"], my_names,
+                                           reverse=True, bold=False)
+                my_collabs = []
+                for pub in pubs:
+                    if is_since(pub.get("year"), since_date.year,
+                                pub.get("month", 1), since_date.month):
+                        if not pub.get("month"):
+                            print("WARNING: {} is missing month".format(
+                                pub["_id"]))
+                        if pub.get("month") == "tbd".casefold():
+                            print("WARNING: month in {} is tbd".format(
+                                pub["_id"]))
+
+                        my_collabs.extend([collabs for collabs in
+                                           [names for names in
+                                            pub.get('author', [])]])
+                people, institutions = [], []
+                for collab in my_collabs:
+                    person = fuzzy_retrieval(all_docs_from_collection(
+                        rc.client, "people"),
+                        ["name", "aka", "_id"],
+                        collab)
+                    if not person:
+                        person = fuzzy_retrieval(all_docs_from_collection(
+                            rc.client, "contacts"),
+                            ["name", "aka", "_id"], collab)
+                        if not person:
+                            print(
+                                "WARNING: {} not found in contacts. Check aka".format(
+                                    collab))
+                        else:
+                            people.append(person)
+                            inst = fuzzy_retrieval(all_docs_from_collection(
+                                rc.client, "institutions"),
+                                ["name", "aka", "_id"],
+                                person["institution"])
+                            if inst:
+                                institutions.append(inst["name"])
+                            else:
+                                institutions.append(
+                                    person.get("institution", "missing"))
+                                print(
+                                    "WARNING: {} missing from institutions".format(
+                                        person["institution"]))
+                    else:
+                        people.append(person)
+                        pinst = person.get("employment",
+                                           [{"organization": "missing"}])[
+                            0]["organization"]
+                        inst = fuzzy_retrieval(all_docs_from_collection(
+                            rc.client, "institutions"), ["name", "aka", "_id"],
+                            pinst)
+                        if inst:
+                            institutions.append(inst["name"])
+                        else:
+                            institutions.append(pinst)
+                            print(
+                                "WARNING: {} missing from institutions".format(
+                                    pinst))
+                ppl_names = [(person["name"], i) for
+                             person, i in zip(people, institutions) if
+                             person]
+                #                print(set([person["name"] for person in people if person]))
+                print(set([person for person in ppl_names]))
+            emp = p.get("employment", [{"organization": "missing",
+                                        "begin_year": 2019}])
+            emp.sort(key=ene_date_key, reverse=True)
+            self.render(
+                "recentcollabs.csv",
+                p["_id"] + ".csv",
+                p=p,
+                title=p.get("name", ""),
+                pubs=pubs,
+                names=names,
+                bibfile=bibfile,
+                employment=emp,
+                collabs=my_collabs
+            )
+            self.pdf(p["_id"])
+
+    def filter_publications(self, authors, reverse=False):
+        rc = self.rc
+        pubs = []
+        for pub in all_docs_from_collection(rc.client, "citations"):
+            if len(set(pub["author"]) & authors) == 0:
+                continue
+            bold_self = []
+            for a in pub["author"]:
+                if a in authors:
+                    bold_self.append("\\textbf{" + a + "}")
+                else:
+                    bold_self.append(a)
+            pub["author"] = bold_self
+            pubs.append(pub)
+        pubs.sort(key=doc_date_key, reverse=reverse)
+        return pubs
+
+    def make_bibtex_file(self, pubs, pid, person_dir="."):
+        if not HAVE_BIBTEX_PARSER:
+            return None
+        skip_keys = set(["ID", "ENTRYTYPE", "author"])
+        self.bibdb.entries = ents = []
+        for pub in pubs:
+            ent = dict(pub)
+            ent["ID"] = ent.pop("_id")
+            ent["ENTRYTYPE"] = ent.pop("entrytype")
+            ent["author"] = " and ".join(ent["author"])
+            for key in ent.keys():
+                if key in skip_keys:
+                    continue
+            ents.append(ent)
+        fname = os.path.join(person_dir, pid) + ".bib"
+        with open(fname, "w", encoding='utf-8') as f:
+            f.write(self.bibwriter.write(self.bibdb))
+        return fname
diff --git a/regolith/dates.py b/regolith/dates.py
@@ -40,7 +40,7 @@
     "dec.": 12,
     "december": 12,
     "": 1,
-    "tbd": 1,
+    "tbd": 1
 }