diff --git a/.gitignore b/.gitignore index 5b77a2a4..1b96a834 100644 --- a/.gitignore +++ b/.gitignore @@ -179,4 +179,6 @@ src/marvin/_version.py # Prefect .prefect/ .prefect/*.json -.prefectignore \ No newline at end of file +.prefectignore + +.idea/ \ No newline at end of file diff --git a/README.md b/README.md index ef51522d..e5c4ad5e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ +# IMPORTANT + +This is a fork for Marvin for my research. I don't expect this to have wider adoption beyond my own research. Please see [here](ResearchReadMe.md) for some more information. + +

diff --git a/ResearchReadMe.md b/ResearchReadMe.md new file mode 100644 index 00000000..39fe3c85 --- /dev/null +++ b/ResearchReadMe.md @@ -0,0 +1,363 @@ +# LLM Language Features + +## What I have done so far + +- Higher Order Return Types + + + + + + + + +## Motivations +Recently, there have been a myriad of libraries that provide utilities for developers to integrate LMs into their programmatic environments. However, there are few experiments emphasizing how LLMs could be _organically_ integrated into a programming language. It's been envisioned by many that the field of software engineering will be revolutionized by integration with LLM and other generative AI technologies. At the macroscopic level, libraries have imposed/provided structures for prompting. However, there are few more microscopic structures that is more seamlessly integrated into programming itself. + +Of course, everything so far we are proposing for this library can be simulated with straight function calls to an LM, but we are hoping the added convenience will be helpful for developers. + +The library, for the foreseeable future, will probably be mainly be used for the [Composable System Group](https://github.com/composablesys) and [myself](https://github.com/peteryongzhong) as a way for experimentation, with the goal that if the experiments prove interesting, more energy will be devoted to make it production ready. The design of the library probably initially will lack a holistic design, but rather a collection of features. As I expand this, I am hoping there will be more holistic design opportunities that open up. + +## Features Overview + +Before any of the ideas are described in this section, we wish to cite [DSPy](https://github.com/stanfordnlp/dspy) and [Marvin](https://github.com/PrefectHQ/marvin), as they inspired many of the features contained herein. In pa + +### Natural Language Functions (TODO) + +Functions are the most basic unit in which a programmer could interact with the LM. We take much inspiration from Marvin in this case since we feel their design decisions are very intuitive. + +```python + +import lmlang + +@lmlang.fn +def cuisine_recs(spiciness : int, food_likes: list[str] ) -> str: + lmlang.input_describe(spiciness, "the spice tolerance") + lmlang.input_describe(food_likes, "a list of foods that are currently like") + lmlang.output_describe("cuisine that is good to try") +``` + +```python +import lmlang + +@lmlang.fn +def cuisine_recs(spiciness : int, food_likes: list[str] ) -> str: + """ + What cuisine is good to try given the `spiciness` and `food_likes` which denotes my preferences right now? + """ +``` + + +#### Why not higher order as well? (TODO) + +In programming, especially in a functional programming setting, programmers use higher-ordered functions to specialize the behavior of their program by being more abstract. There is no reason why the same design patterns could not be extended to LMs. + +One understanding of a prompt is a high level fuzzy specification for some high level natural computation process, such as the one shown in the previous example. We could extend the same process here. + +In this example, let's assume that the CustomerProfile class and the Product class will have some descriptions, the tool should automatically integrate them into the prompt. + +```python +@lmlang.fn +def rating_for_customer(customer_profile: CustomerProfile) -> Callable[[Product], int]: + """ + :returns: A function from a product to a rating that the customer would likely given based on their profile + """ +``` + +Another use case of higher-ordered functions might be through the use of a function as an argument to a natural language function. + +Often the argument that is a function can essentially serve as some "tool" that the LM has access to. This tool could be something that is defined as a standard programmatic function, but it's conceivable that it might also be another LM powered function. + +```python +@lmlang.fn +def outfit_match(items_of_clothing: List[Clothes], client_comment : Callable[[Clothes], str], scenario:str) -> List[Clothes]: + """ + As a fashion designer, for a given `scenario` please pick a suitable outfit from the `item_of_clothing`, taking into account \ + of the `client_comment`. + """ +``` + +```python +@lmlang.fn +def landing_airport(airports : List[Airport], weather: Callable[[Airport], str]) -> Airport: + """ + I am a VFR student pilot, where should I land without breaking FAA regulations? + + :param airports: List of Airports that I am considering landing + :param weather: A function that returns the current weather conditions (METAR) and (TAF) at a particular airport + :returns: A function from a product to a rating that the customer would likely given based on their profile + """ +``` + + +#### Natural Predicate (TODO) + +This serves as a sugar for Natural Language functions, but it can be really helpful in some cases. + +```python +ls = [dress1, dress2, dress3 ....] + +filter(lmlang.predicate("Dresses suitable for formal occasions."), ls) +``` + +At times, it will be helpful to have an explanation for why a predicate succeeded or failed. + + +```python +ls = [dress1, dress2, dress3 ....] + +filter(lmlang.explainable_predicate("Dresses suitable for formal occasions."), ls) +# behave the same as the previous example + +result = lmlang.explainable_predicate("Dresses suitable for formal occasions.")(casual_dress) # return an lmlang.Explainable Object +if not result: + print("dress is not suitable") # triggered here + print(result.reason ) + +``` + +or other convenient function such as : + +```python +res = lmlang.quickfunc("what is the earth's only satellite?") #the moon +air_space = AirSpace.ClassD +res = lmlang.quickfunc("What is the minimum visibility in an VFR flight from this class of airpace during the day in statute miles", air_space) #3 +``` + + +#### Natural Contracts (TODO) + +Software contracts are a very good way of ensuring invariants are not broken. Especially during dev time, and even in production, contracts help programmers by concisely specifying the pre and post conditions. Sometimes these conditions can be spelled out programmtically, say using a contract library such as [PyContract](https://andreacensi.github.io/contracts/) would be quite nice in terms of this specification. + +Unfortunately, PyContract has long since fell out of maintenance. But Pydantic is a library that has many intersecting features with PyContract that could be helpful. + +This is especially helpful in the context of https://typing.readthedocs.io/en/latest/spec/qualifiers.html#annotated and https://docs.pydantic.dev/latest/concepts/types/#custom-types, which allows for the annotation of custom typing constraints. We propose combining such annotations and type checking validation capabilities of Pydantic and the introduction of LLM as a special validator into the contract system. + +```python +PositiveInt = Annotated[int, Gt(0)] +RespectfulComment = Annotated[str, lmlang.annotate("Must be respectful")] + +@contract +def process_comment(times : PositiveInt, comment: RespectfulComment) -> bool: + pass +``` +At times we might need compositional constraints +```python +PositiveInt = Annotated[int, Gt(0)] +RespectfulComment = Annotated[str, lmlang.annotate("Must be respectful")] + +@contract(precond=lmlang.compositional_contract("comment", "reply", "the reply to the comment must be related and not off topic")) +def process_comment(comment: RespectfulComment, reply:str) -> bool: + pass +``` +For post conditions it will be very similar. Where the LM could evaluate if the function produced the correct result based on either just the result +or the result parameterized by arguments. + +### Code Generation + +Code Generation refer to the process by which a language model, rather than directing producing the answer to some question, produces a code segment that achieves the desired outcomes. A very related concept in prompting is the notion of [Program of Thoughts](https://arxiv.org/abs/2211.12588). Especially for predictable tasks, the LM may be able to understand the high level abstract computational notion of the desired task, better than the actual calculation, which may involve numerical tasks which the LMs tend to perform poorly on. + +We have already seen how we could wrap the LM around a function that is governed by its natural language descriptions above. The interface exposes a direct opportunity for code generation. + +```python +import lmlang + +@lmlang.fn(codegen=True) +def user_eligible(user: User ) -> bool: + lmlang.func_describe("This function should check if the user is eligible for discount by checking if it's either a student or teacher and has never had an account delinquency") +``` +The LM might generate a code to the effect of: + +```python + +def user_eligible(user: User ) -> bool: + if user.status not in ["student", "teacher"]: + return False + return len(user.delinquencies) == 0 +``` + +Or sometimes we need to instruct the LM for a partial generation if the structure of the code is fixed, but there are "holes" where the programmer only specifies natural language descriptions: + +```python +import lmlang + +@lmlang.partial(codegen=True) +def send_marketing_emails(users: User) -> bool: + final_user = [] + for user in users: + lmlang.incontext_gen("if the `user` is eligible for discount by checking if it's either a student or teacher and has never had an account delinquency, add it to the `final_user` list") + lmlang.incontext_gen("send an email using the template EmailTemplate.Marketing to all the `final_user` informing they got a promotion.") +``` + +While this is something that could be done at runtime and memoized for future use, it's not really helpful since the programmer would likely wish to refine the generated code. + +We propose that in addition to the runtime behavior, we support integration with popular IDEs such as VScode and/or PyCharm that could allow the user to directly view the generated code live and refine it as they code. + +Furthermore, as software engineers, we recognize the importance of unit testing in the developmental process. In recent LM research results, there has also been very encouraging results that demonstrate the ability for unit test to augment the generation process. In recognition of these traits, we believe the programmer should have the ability to use unit testing both to leverage it for correctness assessments, but also for the generation process. + +```python +@lmlang.fn(codegen=True, unittest=Tests.UserEligibleTest) +def user_eligible(user: User ) -> bool: + lmlang.func_describe("This function should check if the user is eligible for discount by checking if it's either a student or teacher and has never had an account delinquency") +``` + +Even if you do not provide `codegen=True`, the library can still "try" to take advantage of the unittest by using it as demonstrations in the ICL sense: + +```python +@lmlang.fn(codegen=True) +def user_eligible(user: User ) -> bool: + lmlang.func_describe("This function should check if the user is eligible for discount by checking if it's either a student or teacher and has never had an account delinquency") +``` + + +### Natural Language Types And Natural Language Specs For Types + +"Types" and data models are concepts that are intimately familiar with developers. However, traditional types are rigid, and for the purposes of LM interactions, can be quite restrictive. This could be a result of the fact that unstructured natural language data is forced and serialized into structured format. Whilst such structure is certainly desirable, for the intermediate LM steps, it could be desirous to allow the LM to be a bit more unstructured. + +What does this mean in terms of concrete class design? + +We first propose to introduce a contract system for classes built upon Pydantic that augment the existing type annotation validation system with the ability to specify natural language constraints. Secondly, we propose that classes should have an optional natural langauge metadata, which, if generated through some intermediate process, could be populated by the LM for future use. + +This is partially achieved through the annotations shown above which allows for natural langauge descriptions. However, we could extend this by allowing for interfield dependencies to be specified, both in natural language and programmatically: + +```python +@classcontract{ + [lmlang.compositional_contract("plane_model", "airport", "the airport must be big enough for support the plane")] +} +class Pilot: + """ + A pilot is a class describing a pilot who files at an airport + """ + id: int + name : str + plane_model : str + airport : str +``` + +An idea that naturally falls out of this set up is the notion of a natural language inheritance scheme: + +```python +@classcontract{ + [lmlang.compositional_contract("plane_model", "The plane_model must be a big plane with jet engines")] +} +class BigBoyPilot(Pilot): + pass +``` + +Why would this be helpful? The natural language classification of object hierarchy could be helpful for moderating the functions located in the subclass. In effect, with the understanding that the natural langauge restriction is in place, it opens the function up to more possibilities since it's a more constrained set of values. + +#### Casting + +For users of C++, the concept of dynamic casting, while somewhat cursed, has value from a software engineering perspective. + +LMs could moderate the process of casting and dyncast. + +For instance: +```python +lmlang.try_cast(cessna_172_pilot, BigBoyPilot ) # None +lmlang.try_cast(boeing747_pilot, BigBoyPilot) # a BigBoyPilot object +``` +We could even use natural language casting as a way to extract information by converting from one model to another model. + +```python +class Food: + brand :str + ingredient: List[str] + # ... + +class Nutrition: + """Whether it's a good source of nutrients""" + vitaminA: bool + calcium :bool + # .... + +lmlang.try_cast(huel, Nutrition, "Provide a nutritional breakdown of the food please.") +``` +#### Selecting + +Sometimes the property a programmer want is not necessarily the property that is explicitly specified. + +Therefore it could be helpful to use natural langauge command as if you are getting a property. + +```python +ceasna_172.ai_property("Number of Engines") # return 1 +boeing737max.ai_property("Number of Emergency Exits") # undefined behavior +``` + +### Natural Language Pattern Matching + +Augment the syntax of Python's pattern matching to use natural language. + +Python's current pattern matching syntax and semantics is described in details [here](https://docs.python.org/3/reference/compound_stmts.html#match). The proposed syntax here will likely need to run through a source-to-source pre-compilation that will ensure the compiled code adheres to the features that are currently available. + +We will use `a` as a prefix for AI interpreted matching + +For instance: +```python +user_command = "I want to turn the lamp on please thanks!:)" + +match user_command: + case a'Lights on': + pass + case a'Lights off': + pass + case a'Speaker On': + pass + # ..... Other cases +``` +An extremely powerful concept in functional programming pattern matching that has been adopted by the Python community is the ability to extract information in a particular object through a predefined patterns. Right now, these patterns are all programmatic patterns, but with LMs, we could introduce natural language decompositions. +```python +match user_command: + case a'Volume increase by {volume_up} units': + volumn += volumn_up + case a'Lights on': + pass + case a'Lights off': + pass + case a'Speaker On': + pass + # ..... Other cases +``` + +```python +match transportation_devices: + case Plane(a'Number of Engine' as engine_num): + pass + case Cars(a'Number of Cylindars' as cylindar_num): + pass +``` + +Of course, this can be combined with the try_cast semantics earlier: + + +```python +match food: + case CastedType(Nutrient(vitaminA = va), "Provide a nutritional breakdown of the food please."): + pass + +``` + +### Mocking and Unit-testing + +IDE support is critical here. + +#### Mocking and Synthetic Generation + +Synthetically generating + +```python +lmlang.mock(User, "Adam who is great at swimming") + +``` + +#### Direct unit test generation + +Cite 10.1109/TSE.2023.3334955 . Syntax TBD, but developers should specify the function they want to test and some properties in natural language. + +### What can we do with loops? + +Who knows + +## Prompt Agnostic and Prompting Technique incorporation + +We are thinking maybe we could use `with` blocks to specify concrete strategies like COT? diff --git a/final_report_prompt_engineering1.ipynb b/final_report_prompt_engineering1.ipynb new file mode 100644 index 00000000..ff4b8eaa --- /dev/null +++ b/final_report_prompt_engineering1.ipynb @@ -0,0 +1,1297 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a52d86708ef45426", + "metadata": { + "collapsed": false + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "5ff73436200e7230", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:35:00.970368Z", + "start_time": "2024-04-28T05:35:00.601295Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "from pydantic import Field\n", + "\n", + "load_dotenv()\n", + "import marvin\n", + "from marvin.settings import temporary_settings\n", + "from typing import Annotated, List, Callable\n", + "from annotated_types import Predicate" + ] + }, + { + "cell_type": "markdown", + "id": "24fe46583cac36dd", + "metadata": { + "collapsed": false + }, + "source": [ + "# Final Report\n", + "Peter Yong Zhong\n", + "Prompt Engineering Research Thoughts and Directions\n", + "Do not redistribute \n", + "Warning: The report may contain explicit language in some examples to illustrate certain topics of importance and serving strictly for demonstration purposes. \n", + "\n", + "## Abstract\n", + "\n", + "This paper explores the integration of Large Language Models (LLMs) into software engineering, focusing on developing language features suited for a post-LLM environment. We propose new programming constructs, including Higher Order Types and Functions, Contracts, and Semantic Pattern Matching, alongside the innovative concept of 'Natural Language Types' to bridge structured software needs with the fuzziness typical of LLMs. Our approach emphasizes a bottom-up design philosophy, enhancing traditional programming patterns with AI capabilities to address both the abstractness and computational demands of integrating LLMs. This research contributes to the understanding and evolution of programming abstractions in LLM-enhanced environments, aiming to make LLMs more accessible and functional for developers.\n", + "\n", + "## Introduction\n", + "\n", + "\n", + "Large Language Models (LLMs) have significantly impacted software engineering, often heralded as a revolutionary force in how software engineers develop, test, and deploy code. The influence of LLMs primarily extends to two areas: firstly, the development and generation of executable code[1][2], and secondly, integrating LLM calls into other software systems' logic. In the latter case, developers either manually manipulate strings based on prompt templates and directly use foundation model APIs, or they utilize popular frameworks to meet their LLM requirements. The diversity in LLM programming abstractions has spurred active research. For example, the recently proposed Language Model System Interface Model (LMSI)[3] stratifies these abstractions into seven layers of increasing abstractness. Researchers have identified five families of LLM abstractions emerging within popular frameworks.\n", + "\n", + "However, many frameworks emphasize the machine learning aspects of LLMs, focusing on novel prompting techniques[4], pipeline designs[5], and efficient inference[6]. We argue that in scenarios where LLMs are *progressively* integrated into existing systems, a bottom-up approach should be adopted. This approach should prioritize programming concerns and patterns, using LLMs to enhance these patterns with powerful generalization and computational analysis capabilities.\n", + "\n", + "Developers typically interact with LLMs at the programming language level. Similar to paradigms like Object-Oriented Programming[7], Async-Await programming[8], and Object-Relational Mappers[9], language features have enabled developers to express business logic more abstractly. The compiler then 'desugars' these constructs into a lower-level implementation that can be complex. In this project, we explore designing such language features in a post-LLM environment to understand the challenges and opportunities they present.\n", + "\n", + "We have chosen to augment Marvin[10], a popular LLM framework that aligns with our philosophy of empowering developers focused on using AI rather than building it. Marvin introduces several coding constructs for data transformation, entity extraction, classification, and AI functions, which serve as a foundation for our designs.\n", + "\n", + "Our contributions are threefold. First, we propose and implement a set of language features enhanced by LLMs, providing an open-source implementation under the Apache license. These features include Higher Order Types and Functions, Contracts, and Semantic Pattern Matching. Second, we introduce a novel concept of 'Natural Language Types', blending structured software needs with the fuzziness typical of LLMs. Lastly, we outline a novel type-driven and unit-tests-driven code generation methodology that, while not yet implemented, lays the groundwork for future developments.\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "7c47992a1cc8614e", + "metadata": { + "collapsed": false + }, + "source": [ + "## Natural Language Types\n", + "\n", + "In programming, a type serves two crucial interrelated functions. First, it defines the schema of a data model by specifying the required information an object of that type should hold and setting constraints on the nature or \"type\" of each piece of data. These constraints are checked either statically or dynamically, ensuring that when a program interacts with an object of a given type, it can appropriately access and manipulate its fields. Second, in object-oriented environments, the conceptualization of types mirrors the natural language reasoning about physical objects. This intuitive alignment between programming types and natural language facilitates understanding, particularly beneficial when working with LLMs trained on human language data.\n", + "\n", + "The concept of Natural Language Types is straightforward yet innovative, primarily consisting of two elements. Traditional data types like int and string often fall short in describing complex, fuzzy constraints prevalent in natural language environments. Moreover, traditional typing systems struggle to address the interdependencies between different fields. Thus, the first component of Natural Language Types involves defining constraints at both the field level and between fields using natural language. This feature is vital as it underpins the Natural Language contract system discussed later in this work.\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "94944b830930725a", + "metadata": { + "collapsed": false + }, + "source": [ + "We can explore this idea through an example below. Here, let's start off with a simple, traditional type - Pilot. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2d60296dbd80fe5a", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T21:39:46.875275Z", + "start_time": "2024-04-27T21:39:46.868480Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Pilot(other_information=None, id=1, name='Noah Tabuex', plane_model='Cessna 172', certificate='PPL', airport='KPIT')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class Pilot(marvin.NaturalLangType):\n", + " id: int\n", + " name: str\n", + " plane_model: str\n", + " certificate: str\n", + " airport: str\n", + "\n", + "Pilot(id = 1, name=\"Noah Tabuex\", plane_model=\"Cessna 172\", certificate=\"PPL\", airport = \"KPIT\")" + ] + }, + { + "cell_type": "markdown", + "id": "e18c33466ea3e462", + "metadata": { + "collapsed": false + }, + "source": [ + "With Natural Language Types, we should be able to use natural language to specify certain constraints. Here in this case, we could rely on the internal knowledge of the LM to validate the result. Here we are using `temporary_setting` to enable the contract capabilities. We chose this design since we argue that contracts should mainly be a develop time artifact that should be disabled in production to minimize performance degradations." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "47e989aeb0678326", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T22:16:10.811178Z", + "start_time": "2024-04-27T22:16:10.804482Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "class Pilot(marvin.NaturalLangType):\n", + " id: int\n", + " name: str\n", + " plane_model: Annotated[str, Predicate(marvin.val_contract(\"Plane must contain more than 1 engine\"))]\n", + " certificate: str\n", + " airport: str" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "417cfcdb0cd31261", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T21:44:57.919134Z", + "start_time": "2024-04-27T21:44:57.299087Z" + }, + "collapsed": false + }, + "outputs": [ + { + "ename": "ValidationError", + "evalue": "1 validation error for Pilot\nplane_model\n Predicate val_contract..wrapper failed [type=predicate_failed, input_value='Cessna 172', input_type=str]", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mValidationError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[7], line 9\u001B[0m\n\u001B[1;32m 6\u001B[0m airport: \u001B[38;5;28mstr\u001B[39m\n\u001B[1;32m 8\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m temporary_settings(ai__text__disable_contract\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m):\n\u001B[0;32m----> 9\u001B[0m Pilot(\u001B[38;5;28mid\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m, name\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNoah Tabuex\u001B[39m\u001B[38;5;124m\"\u001B[39m, plane_model\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCessna 172\u001B[39m\u001B[38;5;124m\"\u001B[39m, certificate\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mPPL\u001B[39m\u001B[38;5;124m\"\u001B[39m, airport \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mKPIT\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n", + "File \u001B[0;32m~/anaconda3/envs/marvin-fork/lib/python3.11/site-packages/pydantic/main.py:171\u001B[0m, in \u001B[0;36mBaseModel.__init__\u001B[0;34m(self, **data)\u001B[0m\n\u001B[1;32m 169\u001B[0m \u001B[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001B[39;00m\n\u001B[1;32m 170\u001B[0m __tracebackhide__ \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m--> 171\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m__pydantic_validator__\u001B[38;5;241m.\u001B[39mvalidate_python(data, self_instance\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m)\n", + "\u001B[0;31mValidationError\u001B[0m: 1 validation error for Pilot\nplane_model\n Predicate val_contract..wrapper failed [type=predicate_failed, input_value='Cessna 172', input_type=str]" + ] + } + ], + "source": [ + "with temporary_settings(ai__text__disable_contract=False):\n", + " Pilot(id = 1, name=\"Noah Tabuex\", plane_model=\"Cessna 172\", certificate=\"PPL\", airport = \"KPIT\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7925bb179a4b48ef", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T22:16:13.401465Z", + "start_time": "2024-04-27T22:16:12.352889Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Pilot(other_information=None, id=1, name='Noah Tabuex', plane_model='Boeing 747', certificate='PPL', airport='KPIT')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with temporary_settings(ai__text__disable_contract=False):\n", + " p = Pilot(id = 1, name=\"Noah Tabuex\", plane_model=\"Boeing 747\", certificate=\"PPL\", airport = \"KPIT\")\n", + "p" + ] + }, + { + "cell_type": "markdown", + "id": "4feeb170bcea26e6", + "metadata": { + "collapsed": false + }, + "source": [ + "The natural language constraints, as mentioned above, could also be applied in a more global fashion. In this example, we are also demonstrating natural language inhertance. Usually in programming, inheritence refines a type by introducing additional fields and restricting behaviors. However, the refinement could in fact be other restrictions or constraints that we place. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fcbdfac05f484cfd", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T23:19:28.917270Z", + "start_time": "2024-04-27T23:19:28.908150Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "class Pilot(marvin.NaturalLangType):\n", + " id: int\n", + " name: str\n", + " plane_model: str\n", + " certificate: str\n", + " airport: str" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2a073663deeeef18", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T22:33:49.660144Z", + "start_time": "2024-04-27T22:33:49.656771Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from typing import List\n", + "\n", + "\n", + "class AdvancedPilot(Pilot):\n", + " @classmethod\n", + " def natural_lang_constraints(cls) -> List[str]:\n", + " existing = super().natural_lang_constraints()\n", + " new_constraints = [\n", + " \"The certificate should allow pilot to fly for compensation and is suitable for the plane_model\"\n", + " ]\n", + " return existing + new_constraints" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5a4b711eca4655a6", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T22:18:56.602815Z", + "start_time": "2024-04-27T22:18:55.737976Z" + }, + "collapsed": false + }, + "outputs": [ + { + "ename": "ValidationError", + "evalue": "1 validation error for AdvancedPilot\n Value error, Natural language constraints not met:The certificate should allow pilot to fly for compensation and is suitable for the plane_model\n [type=value_error, input_value={'id': 1, 'name': 'Noah T...PPL', 'airport': 'KPIT'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.6/v/value_error", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mValidationError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[4], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m temporary_settings(ai__text__disable_contract\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m):\n\u001B[0;32m----> 2\u001B[0m ap \u001B[38;5;241m=\u001B[39m AdvancedPilot(\u001B[38;5;28mid\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m, name\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNoah Tabuex\u001B[39m\u001B[38;5;124m\"\u001B[39m, plane_model\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mBoeing 747\u001B[39m\u001B[38;5;124m\"\u001B[39m, certificate\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mPPL\u001B[39m\u001B[38;5;124m\"\u001B[39m, airport \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mKPIT\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 3\u001B[0m \u001B[38;5;66;03m# A Private Pilot's license is probably not sufficient for a Boeing 747\u001B[39;00m\n\u001B[1;32m 4\u001B[0m ap\n", + "File \u001B[0;32m~/anaconda3/envs/marvin-fork/lib/python3.11/site-packages/pydantic/main.py:171\u001B[0m, in \u001B[0;36mBaseModel.__init__\u001B[0;34m(self, **data)\u001B[0m\n\u001B[1;32m 169\u001B[0m \u001B[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001B[39;00m\n\u001B[1;32m 170\u001B[0m __tracebackhide__ \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m--> 171\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m__pydantic_validator__\u001B[38;5;241m.\u001B[39mvalidate_python(data, self_instance\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m)\n", + "\u001B[0;31mValidationError\u001B[0m: 1 validation error for AdvancedPilot\n Value error, Natural language constraints not met:The certificate should allow pilot to fly for compensation and is suitable for the plane_model\n [type=value_error, input_value={'id': 1, 'name': 'Noah T...PPL', 'airport': 'KPIT'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.6/v/value_error" + ] + } + ], + "source": [ + "with temporary_settings(ai__text__disable_contract=False):\n", + " ap = AdvancedPilot(id = 1, name=\"Noah Tabuex\", plane_model=\"Boeing 747\", certificate=\"PPL\", airport = \"KPIT\")\n", + " # A Private Pilot's license is probably not sufficient for a Boeing 747\n", + "ap" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d1af2afb2527ceba", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T22:34:52.639258Z", + "start_time": "2024-04-27T22:34:33.630456Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AdvancedPilot(other_information=None, id=1, name='Noah Tabuex', plane_model='Airbus A380', certificate='ATP with Type Rating', airport='KPIT')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with temporary_settings(ai__text__disable_contract=False):\n", + " ap = AdvancedPilot(id = 1, name=\"Noah Tabuex\", plane_model=\"Airbus A380\", certificate=\"ATP with Type Rating\", airport = \"KPIT\")\n", + " # ATP refer to airline transport pilot which can carry passengers \n", + "ap" + ] + }, + { + "cell_type": "markdown", + "id": "ed79433d6fba9dfa", + "metadata": { + "collapsed": false + }, + "source": [ + "Astute readers might notice a field `other_information` that is printed, but is not otherwise defined. This powers the second aspect of Natural Language Types. When an LM is generating content, excessively constraining its output, for instance through having restricted and often limited fields might hamper future performance. Instead, the LMs should be given an opportunity to store other relevant information about an object in **natural language** which could be used in the future. \n", + "\n", + "Let's continue with the Piloting example, and this time we use one of the constructs already provided by Marvin to illustrate this:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "10f5abaf3a01353b", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T23:04:37.479202Z", + "start_time": "2024-04-27T23:04:34.380499Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "pilot = marvin.extract(\"Noah Singer, employee number 321, is a Boeing 747 Pilot holding an Airline Transport Pilot with 1000 hours of operations. He mainly flies from KPIT.\", Pilot)[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "756e9e1a50eb64b3", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T23:04:39.158451Z", + "start_time": "2024-04-27T23:04:39.156149Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Pilot(other_information='1000 hours of operations', id=321, name='Noah Singer', plane_model='Boeing 747', certificate='Airline Transport Pilot', airport='KPIT')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pilot" + ] + }, + { + "cell_type": "markdown", + "id": "189ad4d822c6cbd", + "metadata": { + "collapsed": false + }, + "source": [ + "Here, the LM has dynamically captured the 1000 hours of operations as possible information to be used in the future. Let's imagine a scenario where we want to use this object in some other natural language computation, here say through Marvin's AI function. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "86b32dc95754ba7f", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T23:04:58.624391Z", + "start_time": "2024-04-27T23:04:57.625657Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@marvin.ai_fn\n", + "def is_experience_pilot(pilot: Pilot) -> bool:\n", + " \"Returns whether the pilot has significant experience or straight out of pilot school\"\n", + " \n", + "is_experience_pilot(pilot)" + ] + }, + { + "cell_type": "markdown", + "id": "13747a8b31a9c50f", + "metadata": { + "collapsed": false + }, + "source": [ + "The key takeaway is that the loss of information when transitioning between different perspectives (natural language vs programming) need not to be constrained by the relative loss of information dettermined by the complexity of the type but can retain information in the natural language environment. " + ] + }, + { + "cell_type": "markdown", + "id": "568c62016d3c230b", + "metadata": { + "collapsed": false + }, + "source": [ + "For programmers familiar with C++, and especially its dynamic casting structure with its inheritence structure, natural language types allow us to express this concept as well. For instance, we might have an ExperiencedPilot class:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7e1d63584a862d69", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-27T23:20:36.478910Z", + "start_time": "2024-04-27T23:20:36.474458Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "ExperiencedPilot(other_information=None, id=1, name='Noah Tabuex', plane_model='Airbus A380', certificate='ATP with Type Rating', airport='KPIT', hours_flown=1000)\n", + "None\n" + ] + } + ], + "source": [ + "from annotated_types import Gt\n", + "\n", + "\n", + "class ExperiencedPilot(Pilot):\n", + " \n", + " hours_flown : Annotated[int, Gt(500)] = Field(description=\"Hours flown by the pilot\")\n", + " @classmethod\n", + " def natural_lang_constraints(cls) -> List[str]:\n", + " existing = super().natural_lang_constraints()\n", + " new_constraints = [\n", + " \"The Pilot must be experienced and has not had disciplinary infractions\"\n", + " ]\n", + " return existing + new_constraints\n", + "\n", + "print(marvin.try_cast(pilot,ExperiencedPilot))\n", + "pilot_unexperienced = marvin.extract(\"Noah Singer, employee number 344, is a Boeing 747 Pilot holding an Airline Transport Pilot with 1000 hours of operations. He mainly flies from KPIT. Noah was recently convicted of a DUI and is placed under suspension. \", Pilot)[0]\n", + "print(marvin.try_cast(pilot_unexperienced,ExperiencedPilot))" + ] + }, + { + "cell_type": "markdown", + "id": "160904a1c29130d2", + "metadata": { + "collapsed": false + }, + "source": [ + "## LM Programming Constructs\n", + "\n", + "### Higher Order Functions\n", + "\n", + "An LM can be thought of as an inference engine between some natural language input, some instructions to generate a natural language output. In this sense, its no different from performing computation in the natural language space. Such a correspondence is the basis for many schema-driven features in DSPy and Marvin, where first order function signatures, in natural language or translated from python, can be used as a template for the LM to perform fuzzy computations. The AI functions for Marvin demonstrates a more concrete example: " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4f6f8252f2be6c99", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T04:44:14.594841Z", + "start_time": "2024-04-28T04:44:08.743312Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Preheat your oven to a suitable temperature for baking chicken, such as 375 degrees Fahrenheit.',\n", + " 'Season the chicken with salt, pepper, and a bit of the zest from your lemon for a citrusy flavor.',\n", + " 'Place the seasoned chicken in a baking dish.',\n", + " 'Slice the lemon and place the slices on top of the chicken to infuse it with lemony aroma while it cooks.',\n", + " 'Bake the chicken in the preheated oven until it is fully cooked and the juices run clear, about 35-45 minutes, depending on the size of the chicken pieces.',\n", + " \"While the chicken is baking, prepare the couscous according to the package instructions, typically by boiling water, adding the couscous, and letting it sit covered until it's fluffy and all the water is absorbed.\",\n", + " 'Once the couscous is ready, stir in some chopped olives for an added burst of flavor and mix well.',\n", + " 'Serve the baked lemon chicken hot, paired with the olive-infused couscous on the side.']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@marvin.fn\n", + "def generate_recipe(ingredients: list[str]) -> list[str]:\n", + " \"\"\"From a list of `ingredients`, generates a\n", + " complete instruction set to cook a recipe.\n", + " \"\"\"\n", + "\n", + "generate_recipe([\"lemon\", \"chicken\", \"olives\", \"coucous\"])" + ] + }, + { + "cell_type": "markdown", + "id": "96ee414977c96673", + "metadata": { + "collapsed": false + }, + "source": [ + "The connections with functions are obviously very powerful from a language design perspective. However, none of the existing literatures and tools we evaluated has made the connections between LMs and higher order functions, which, in a functional language or a language where functional programming patterns are prevalent like Python, could be used to express more powerful business logics.\n", + "\n", + "#### Higher Order Inputs\n", + "\n", + "A function is considered to have a higher-order input if one of its arguments is another function. This programming pattern is prevalent in functional programming and extends beyond it. Traditionally, having a function as an input allows the main function's behavior to be parameterized based on the input function. For example, when integrating external APIs, different endpoints may necessitate different pre-processing or post-processing of data. Passing functions tailored to each API endpoint can abstract and streamline the integration process.\n", + "\n", + "This concept should therefore be familiar to LM users, as it closely relates to the idea of function calling and tool usage, such as in ReAct[17]. An inquiry to the LM that requires it to use a tool to gather external information or interact with external environments can be viewed as having function arguments that represent the tool's signatures. We have implemented this concept through an enhancement of the marvin.fn interface.\n", + "\n", + "The following example may more concretely demonstrate this feature: \n", + "\n", + "Let's say we have some external API that returns the weather condition at cities" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "eb230187c606cd51", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:15:54.888379Z", + "start_time": "2024-04-28T05:15:54.886180Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "\n", + "def weather_at_city(city: str) -> str:\n", + " if city == \"San Francisco\":\n", + " return \"Sunny and bright\"\n", + " if city == \"Los Angeles\":\n", + " return \"Cold and Cloudy\"\n", + " if city == \"Washington D.C.\":\n", + " return \"Cloudy but comfortable\"" + ] + }, + { + "cell_type": "markdown", + "id": "1948c7aab7f5bf1b", + "metadata": { + "collapsed": false + }, + "source": [ + "A developer building a travel planning application could be interested in how the attractions might be recommended to the users based on the weather conditions. In this case, they may wish to write a function where given an attraction, and access to this API, the model would give an rating out of 10. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8eae76408b9acc4f", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:05:33.296966Z", + "start_time": "2024-04-28T05:05:33.294997Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from typing import Callable\n", + "\n", + "@marvin.fn(max_tool_usage_times=1)\n", + "def pleasantness(attraction: str, weather_func: Callable[[str], str]) -> str:\n", + " \"\"\"\n", + " Args:\n", + " attraction: the name of the attraction in some place\n", + " weather_func: a function that get the weather at a particular **city** that the attraction is located.\n", + " Returns:\n", + " How pleasant the attraction will likely be given the weather between 0 and 10\n", + " \"\"\"\n", + " pass\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c9b9e34cb1c0fccd", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:14:07.145875Z", + "start_time": "2024-04-28T05:14:03.690826Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "8\n", + "3\n" + ] + } + ], + "source": [ + "# # the weather in SF is really good rn, LA not so much\n", + "print(pleasantness(\"The Golden Gate Bridge\", weather_at_city))\n", + "print(pleasantness(\"Hollywood Sign\", weather_at_city)) " + ] + }, + { + "cell_type": "markdown", + "id": "bf16e6ba08762d76", + "metadata": { + "collapsed": false + }, + "source": [ + "A possible way that a developer might integrate such a function is through sorting a list of attractions. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "13ace6e1f684efb3", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:18:46.423912Z", + "start_time": "2024-04-28T05:18:46.422334Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['The Golden Gate Bridge', 'Lincoln Memorial', 'Hollywood Sign']\n" + ] + } + ], + "source": [ + "print(sorted([\"The Golden Gate Bridge\",\"Hollywood Sign\", \"Lincoln Memorial\"], key=pleasantness, reverse=True))" + ] + }, + { + "cell_type": "markdown", + "id": "7f59f423f079280c", + "metadata": { + "collapsed": false + }, + "source": [ + "#### Higher Order Function Outputs\n", + "\n", + "Similar to functional inputs, functions themselves can output another function. In a more trivial case, this may take shape in a form of currying, but in the general case, it is often a process of input specialization, where the outputted function is specialized by some input. \n", + "\n", + "It is best to illustrate this pattern through an example:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cf1fa3ec85fc9bcf", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:35:07.429247Z", + "start_time": "2024-04-28T05:35:07.427501Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "@marvin.fn\n", + "def rating_for_customer(customer_profile: str) -> Callable[[str], int]:\n", + " \"\"\"\n", + " Args:\n", + " customer_profile: the preferences of the customer\n", + " Returns:\n", + " a function that specializes on the customer_profile to give a rating of a product between 1 to 10.\n", + " \"\"\"\n", + " pass\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "c7fa7f3ae33359a3", + "metadata": { + "collapsed": false + }, + "source": [ + "In this case, we are producing another callable function that is powered by LM. However, rather than fixing a prompt, the prompt is dynamically generated based on the customer profile. This could bring forth a few potential benefits. Based on the purpose of the generated function, it's possible that certain aspects of the customer profile would become irrelevant. Therefore, the generated function, along with its prompt, could be shorter if this function were to be applied in a broader setting. Secondly, the induced function is now simply a generic ratings function and can be applied wherever a ratings function would be desired, without worrying about the customer profile. In fact, a possible use case may also include some sort of ensemble recommender using this as one of many rating input functions." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9831f1658d36d7dd", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:35:26.500775Z", + "start_time": "2024-04-28T05:35:10.342225Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'8'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rating_func = rating_for_customer(\n", + " \"asian lady who cares about quality but cost is of greater concern\"\n", + ")\n", + "rt = rating_func(\"A wonderful blender that is only $19, on sale from $100\") \n", + "rt" + ] + }, + { + "cell_type": "markdown", + "id": "62581437576378aa", + "metadata": { + "collapsed": false + }, + "source": [ + "### Natural Language Contracts\n", + "\n", + "Design by Contract (DbC)[14] is a programming methodology that defines precise interface specifications through preconditions, postconditions, and invariants, which clarify the expected behavior of software components. DbC offers significant benefits during development by reducing bugs and enhancing code reliability, as it enforces a formal agreement on what software components must accomplish before and after execution. However, implementing DbC presents challenges, particularly because business logic is often articulated in natural language, which can be ambiguous and difficult to translate into programmatically enforceable contracts. This discrepancy can lead to complexities in defining exhaustive and accurate contracts that fully encapsulate the intended behavior of the software.\n", + "\n", + "However, with the advent of LMs, the issues introduced by the gap between natural language business logic, and programmatically enforceable contract can be mitigated significantly, since the LMs essentially provide a way of making *documentation* executable. \n", + "\n", + "The ideal use case for these flavour of contracts is not to apply them in a production environment, but rather at development and integration/testing time. The contracts make sure that the flow of values in and out of different components of the program adheres to some natural language descriptions. Whilst such checks in production would likely be intractable, it could serve as important tools to discover discrepancies between design specification and actual implementation during testing (such behavior is more commonly known in developer lingo as \"bugs\") " + ] + }, + { + "cell_type": "markdown", + "id": "fcd0805f19f2943c", + "metadata": { + "collapsed": false + }, + "source": [ + "The example below demonstrates how we have augmented marvin to introduce a @func_contract decorator that would complement pydantic to provide first order contracts enforceable by natural language constraints.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6bb03ee3a16ae56d", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:57:20.145685Z", + "start_time": "2024-04-28T05:57:20.143502Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "@marvin.func_contract\n", + "def reply_comment(\n", + " processed_comment: Annotated[\n", + " str,\n", + " Predicate(\n", + " marvin.val_contract(\"must not contain words inappropriate for children\")\n", + " ),\n", + " ],\n", + ") -> None:\n", + " print(\"The comment passed validation and is sent to the server\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9160d6a018b11eb4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:57:23.978693Z", + "start_time": "2024-04-28T05:57:21.087462Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Try First Reply with Illegal Arguments\n", + "The first call is flagged as a contract violation\n", + "1 validation error for reply_comment\n", + "0\n", + " Predicate val_contract..wrapper failed [type=predicate_failed, input_value='fuck this shit', input_type=str]\n", + "The comment passed validation and is sent to the server\n" + ] + } + ], + "source": [ + "with temporary_settings(ai__text__disable_contract=False):\n", + " print(\"Try First Reply with Illegal Arguments\")\n", + " try:\n", + " reply_comment(\"fuck this shit\")\n", + " except Exception as e:\n", + " print(\"The first call is flagged as a contract violation\")\n", + " print(e)\n", + " try:\n", + " reply_comment(\"The sky is beautiful today\")\n", + " except Exception as e:\n", + " print(\"The second call is flagged as a contract violation\")\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "id": "4abadc9b2db1b1b0", + "metadata": { + "collapsed": false + }, + "source": [ + "Specifying interdependence of input variables should also be allowed:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "36398ecc08f133c9", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:59:00.345781Z", + "start_time": "2024-04-28T05:59:00.343445Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "@marvin.func_contract(\n", + " pre=lambda comment, reply: marvin.val_contract(\n", + " \"the comment and reply must be somewhat related\"\n", + " )(comment=comment, reply=reply)\n", + ")\n", + "def process_comment(comment: str, reply: str) -> str:\n", + " return f\"comment: {comment}\\nreply: {reply}\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d1c7029941679767", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T05:59:02.701824Z", + "start_time": "2024-04-28T05:59:01.275473Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pre condition not met\n", + "comment: This apple is great!\n", + "reply: I agree, but the apple is very sweet and so could be unhealthy\n" + ] + } + ], + "source": [ + "with temporary_settings(ai__text__disable_contract=False):\n", + " try:\n", + " process_comment(\"This apple is great!\", \"IKEA stock is down a lot\")\n", + " except Exception as e:\n", + " print(e)\n", + " print(process_comment(\"This apple is great!\", \"I agree, but the apple is very sweet and so could be unhealthy\"))" + ] + }, + { + "cell_type": "markdown", + "id": "80f1a43fb3bc42c", + "metadata": { + "collapsed": false + }, + "source": [ + "Our last example truly demonstrates the ability for contracts to serve as both a validator but also documentation. " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "524669a9adfc83d3", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T07:07:35.404345Z", + "start_time": "2024-04-28T07:07:35.401795Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "@marvin.func_contract(\n", + " pre=lambda user, transaction: marvin.val_contract(\n", + " \"The user needs to be authenticated to operate in the same market as the transaction\"\n", + " )(user=user, transaction=transaction),\n", + ")\n", + "def process_payment(\n", + " user: Annotated[\n", + " User, Predicate(marvin.val_contract(\"User should be eligible for purchases\"))\n", + " ],\n", + " transaction: Annotated[\n", + " Transaction,\n", + " Predicate(\n", + " marvin.val_contract(\n", + " \"The transaction must not involved illicit drugs or other items banned in PA\"\n", + " )\n", + " ),\n", + " ],\n", + ") -> None:\n", + " # code to process the transaction\n", + " pass\n" + ] + }, + { + "cell_type": "markdown", + "id": "939fd0eeb0c02bd0", + "metadata": { + "collapsed": false + }, + "source": [ + "### Semantic Pattern Matching\n", + "\n", + "The final programming pattern we introduce is based on Pattern Matching in programming languages, a feature that allows developers to check a value against a pattern and, if it matches, to deconstruct the value according to the structure of the pattern. This capability is typically used to simplify code that involves multiple conditions or branches, such as in switch statements or complex conditional expressions. However, traditional pattern matching is primarily structural, limited to matching and decomposing data based on predefined patterns that closely align with the data's physical structure. This structural approach restricts its applicability in scenarios where the data's context or semantics might provide a more intuitive understanding. Significantly, traditional pattern matching does not accommodate natural language-based decomposition, which could allow for a more flexible and semantic interpretation of data objects, leveraging the nuances of human language to enhance the match and decomposition processes.\n", + "\n", + "Recognizing this limitation, we introduce a notion of *Semantic Pattern Matching* that is operated at the levels of Natural Language, incorporating fuzziness that were previously unattainable by traditional techniques. Currently, we are introducing this language construct as a standalong `match` function. However, as future work, we wish to augment the python `match` function to admit our semantic pattern matching paradigms. " + ] + }, + { + "cell_type": "markdown", + "id": "695178d43a6ac12a", + "metadata": { + "collapsed": false + }, + "source": [ + "Revisiting the pilot examples, we demonstrate how *Natural Language Types* could interoperate with *Semantic Pattern Matching*. The constraints and typing information is expressed in the prompt to LMs which matches but also casts the source data(of any serializable type) to the given clauses, should an appropriate one exist. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2d54f6a23c2c04b", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T06:13:49.516089Z", + "start_time": "2024-04-28T06:13:45.521847Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Advanced Pilot name Noah Singer flying mainly Boeing 747'\n" + ] + }, + { + "data": { + "text/plain": [ + "'Student Pilot name Peter Zhong flying mainly Piper Warrior'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class Pilot(marvin.NaturalLangType):\n", + " id: int\n", + " name: str\n", + " plane_model: str\n", + " certificate: str\n", + " airport: str\n", + "\n", + "\n", + "class AdvancedPilot(Pilot):\n", + " @classmethod\n", + " def natural_lang_constraints(cls) -> List[str]:\n", + " existing = super().natural_lang_constraints()\n", + " new_constraints = [\n", + " \"The pilot must hold the appropriate certificate for the plane_model, \"\n", + " + 'which should also be a plane that is considered \"big\" with paid passengers'\n", + " ]\n", + " return existing + new_constraints\n", + "class StudentPilot(Pilot):\n", + " @classmethod\n", + " def natural_lang_constraints(cls) -> List[str]:\n", + " existing = super().natural_lang_constraints()\n", + " new_constraints = [\n", + " \"The pilot should not have too much experience\"\n", + " ]\n", + " return existing + new_constraints\n", + "\n", + "print(marvin.match(\n", + " \"Noah Singer, employee number 321, is a Boeing 747 Pilot \"\n", + " \"holding an Airline Transport Pilot with 1000 hours of operations. \"\n", + " \"He mainly flies from KPIT. \",\n", + " (AdvancedPilot, lambda pilot: f\"Advanced Pilot name {pilot.name} flying mainly {pilot.plane_model}\"),\n", + " fall_through=lambda : print(\"No Advanced Pilot found\")\n", + "))\n", + "print(marvin.match(\n", + " \"Peter Zhong, employee number 453 is a training pilot flying out of KPJC with 6 hours of experience mainly in Piper Warrior\",\n", + " (AdvancedPilot, lambda pilot: f\"Advanced Pilot name {pilot.name} flying mainly {pilot.plane_model}\"),\n", + " (StudentPilot, lambda pilot: f\"Student Pilot name {pilot.name} flying mainly {pilot.plane_model}\"),\n", + " fall_through=lambda : print(\"No Advanced Pilot found\")\n", + "))" + ] + }, + { + "cell_type": "markdown", + "id": "7d9f9e7fcc53102d", + "metadata": { + "collapsed": false + }, + "source": [ + "The decomposition and matching is not just at the type level, but could be made to capture arbitrary capture groups in a semantically aware fashion: " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c59345683fa2c17c", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T06:15:18.892301Z", + "start_time": "2024-04-28T06:15:17.033101Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "System: Increasing Volume by 10 pts\n" + ] + } + ], + "source": [ + "marvin.match(\n", + " \"Alexa up the sound by 10 points will you? \",\n", + " (\"Play Music by {artist}\", lambda artist: artist),\n", + " (\"Volume increase by {volume_up} units\", lambda volume_up: print(\"System: Increasing Volume by 10 pts\")),\n", + " (\"Lights on\", lambda: True),\n", + " (\"Lights off\", lambda: True),\n", + " (AdvancedPilot, lambda pilot: print(pilot))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "ccd519407d351d38", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T06:15:44.556262Z", + "start_time": "2024-04-28T06:15:42.778149Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Turning on the lights\n" + ] + } + ], + "source": [ + "marvin.match(\n", + " \"Alexa, I am feeling the room is a bit dark\",\n", + " (\"Play Music by {artist}\", lambda artist: artist),\n", + " (\"Volume increase by {volume_up} units\", lambda volume_up: print(\"System: Increasing Volume by 10 pts\")),\n", + " (\"Lights on\", lambda: print(\"Turning on the lights\")),\n", + " (\"Lights off\", lambda: True),\n", + " (AdvancedPilot, lambda pilot: print(pilot))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "81c1ef03468f663d", + "metadata": { + "collapsed": false + }, + "source": [ + "The pattern is versatile to capture other types that the user may wish to match to: " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "61e99d4631777c9c", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-28T06:16:42.594877Z", + "start_time": "2024-04-28T06:16:40.137120Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Eggs', 'Tomatoes', 'Pineapples', 'Salt', 'Pepper']\n" + ] + } + ], + "source": [ + "marvin.match(\n", + " \"The recipe requires 1. Eggs 2. Tomatoes 3. Pineapples 4. Salt 5. Pepper\",\n", + " (list, lambda ls: print(ls))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c47a6ea1124097f7", + "metadata": { + "collapsed": false + }, + "source": [ + "There are other features of match that is unsuitable to explain here to its entirety. We refer interested readers to the implementation of the match function. " + ] + }, + { + "cell_type": "markdown", + "id": "7477f87d90c6ec40", + "metadata": { + "collapsed": false + }, + "source": [ + "## Limitations and Threats to Validity\n", + "\n", + "At the design level, the concepts introduced have been implemented within Python and Marvin, subjecting us to the constraints of both systems. Introducing a programming language feature without the ability to modify the underlying language requires us to manipulate existing language features to accommodate new needs, often leading to excessive code \"scaffolding\" instead of a more natural expression of the feature.\n", + "\n", + "At the evaluation level, a potential criticism we anticipate is the absence of an evaluation section. Unlike a prompting technique whose effectiveness can be assessed using an existing dataset with controlled variables, language design in programming is fundamentally a human-oriented endeavor. We recognize that the absence of a user study may compromise the validity of this project. Ideally, we would like to determine whether these features truly make LMs more accessible to developers. While many language features could technically be implemented more straightforwardly by \"desugaring\" them, developers often prefer them for their convenience. Without a user study, the actual impact of these language features remains uncertain.\n", + "\n", + "## Future Work\n", + "\n", + "As mentioned in the limitations section, for future work, we aim to conduct a user-oriented study on the practicality of these constructs, which would enhance our understanding of how software engineers perceive these features.\n", + "\n", + "Additionally, we are exploring the possibility of leveraging these insights to develop our own Domain Specific Language (DSL) for LM-centric computations. This DSL would allow us to experiment with LM-centric language features more freely and with greater control, moving beyond the limitations of Python and Marvin. However, for such a language to be truly effective, it must integrate or communicate with traditional languages like Python or Java, utilizing the robust existing ecosystem. Another potential avenue could involve enhancing a language like Python by introducing novel syntax that goes beyond its current structural pattern matching capabilities to include semantic pattern matching.\n", + "\n", + "Moreover, the prompting strategies for the current project is fixed. Whilst it is able to leverage tool usage to ensure schema following, it suffers from the same brittleness of manual prompt engineering. However, if we are able to parameterize the logic away from the prompting and embed the language features in a library like DSPy[5], then we could take advantage of its optimization pipeline to improve the accuracy of the process. \n", + "\n", + "Lastly, we plan to investigate the possibilities of language-aware code generation. The literature on in-coding context generation—where the LM generates code to fill in specific gaps annotated by programmers—is sparse. Current methods do not capitalize on language-specific attributes such as surrounding typing context and constraints, information that IDEs currently use to provide intellisense recommendations. We see potential for a collaborative design between IDE features and language to harness in-context analysis. Moreover, we aim to explore whether existing unit test frameworks could support code generation. While many code generation techniques currently benefit from unit tests and property-based testing, none have been fully integrated with existing unit testing frameworks. Such integration could significantly enhance the convenience and efficacy of accessing advanced code generation strategies.\n", + "\n", + "## Related Work\n", + "\n", + "This work is influenced by numerous frameworks that have aimed to simplify or enhance the interaction with LMs through programming languages. Specifically, the idea of natural language signatures is an extension of previous developments in DSP, the forerunner of today’s DSPy project[5]. Additionally, significant efforts have been made to constrain LM outputs to adhere to specific schemas or sets of constraints. Tools such as Instructor[11], Outline[12], and LangChain[13] have implemented mechanisms where LLM outputs are converted into Pydantic models, with associated constraints dynamically validated. However, unlike the Natural Language Types we propose, their validation process relies on standard Pydantic validation methods and does not systematically handle fuzzy natural language constraints or map these constraints directly to the prompts. Moreover, data not represented by the model is discarded, potentially omitting useful information in subsequent processing steps.\n", + "\n", + "The concept of a natural language contract system builds on decades of research into software contracts. The Design by Contract approach, popularized by the Eiffel programming language[14], allows programmers to annotate routines with require and ensure clauses, which are then optionally validated dynamically. The syntax we have adopted for Semantic Contracts is especially influenced by the Racket[15] contract system, notably its define/contract structure for individual functions and its ->i contract combinator for dependency among arguments and results. While Racket contracts are inherently higher-order, our design is currently first-order. Exploring the application of Semantic Contracts to a system like Racket could be an intriguing future direction.\n", + "\n", + "Lastly, as our work primarily focuses on introducing novel programming language constructs, we see significant parallels with earlier work on programming patterns, particularly Object Relational Mappers[9], which link database tables and queries to the more familiar concepts of programming objects and functions. This pattern closely mirrors our approach of aligning LM constructs with programming language constructs. Additionally, the Natural Language Types draw from the design philosophy of Object-Oriented Design[7], arguing that the patterns of inheritance, polymorphism, and casting remain pertinent in their LM-enhanced counterparts.\n", + "\n", + "## Conclusion \n", + "\n", + "In conclusion, this research advances the integration of Large Language Models (LLMs) into software engineering by developing new programming constructs designed to improve developer interactions with LLMs. We have introduced Higher Order Types and Functions, Contracts, and Semantic Pattern Matching, along with the concept of 'Natural Language Types'. These tools are designed to bridge the gap between structured programming needs and the fuzzy logic typical of LLMs, offering developers a more intuitive and effective way to utilize AI in software development. Future efforts will focus on further refining these features and assessing their practical impact through user studies. Our goal is to facilitate a more accessible and functional use of LLMs in software engineering, enhancing the overall utility and adoption of AI technologies in the development process. " + ] + }, + { + "cell_type": "markdown", + "id": "443760e2ddaa3537", + "metadata": { + "collapsed": false + }, + "source": [] + }, + { + "cell_type": "markdown", + "id": "18e9d801c91c43ce", + "metadata": { + "collapsed": false + }, + "source": [ + "[1]: “Introducing code llama, a state-of-the-art large language model for coding,” AI at Meta, https://ai.meta.com/blog/code-llama-large-language-model-coding/ (accessed Apr. 27, 2024). \n", + "\n", + "[2]: S. Zhou et al., “Docprompting: Generating code by retrieving the docs,” arXiv.org, https://doi.org/10.48550/arXiv.2207.05987 (accessed Apr. 27, 2024). \n", + "\n", + "[3]: P. Y. Zhong et al., “A guide to large language model abstractions,” Two Sigma, https://www.twosigma.com/articles/a-guide-to-large-language-model-abstractions/ (accessed Apr. 28, 2024). \n", + "\n", + "[4]: Langsmith, https://smith.langchain.com/hub (accessed Apr. 28, 2024). \n", + "\n", + "[5]: O. Khattab et al., “DSPy: Compiling declarative language model calls into self-improving pipelines,” arXiv.org, https://doi.org/10.48550/arXiv.2310.03714 (accessed Apr. 28, 2024). \n", + "\n", + "[6]: L. Beurer-Kellner, M. Fischer, and M. Vechev, “Prompting is programming: A query language for large language models,” arXiv.org, https://doi.org/10.48550/arXiv.2212.06094 (accessed Apr. 28, 2024). \n", + "\n", + "[7]: K. Nygaard, ‘Basic concepts in object oriented programming’, in Proceedings of the 1986 SIGPLAN Workshop on Object-Oriented Programming, Yorktown Heights, New York, USA, 1986, pp. 128–132.\n", + "\n", + "[8]: J. Skeet and E. Lippert, C# in Depth. Shelter Island, NY: Manning Publications Co., 2019. \n", + "\n", + "[9]: E. J. O’Neil, ‘Object/relational mapping 2008: hibernate and the entity data model (edm)’, in Proceedings of the 2008 ACM SIGMOD International Conference on Management of Data, Vancouver, Canada, 2008, pp. 1351–1356.\n", + "\n", + "[10]: PrefectHQ, “PREFECTHQ/Marvin: ✨ build ai interfaces that spark joy,” GitHub, https://github.com/PrefectHQ/marvin (accessed Apr. 28, 2024). \n", + "\n", + "[11]: Jxnl, “JXNL/instructor: Structured outputs for LLMS,” GitHub, https://github.com/jxnl/instructor (accessed Apr. 28, 2024). \n", + "\n", + "[12]: Outlines-Dev, “Outlines-dev/outlines: Structured text generation,” GitHub, https://github.com/outlines-dev/outlines (accessed Apr. 28, 2024). \n", + "\n", + "[13]: Langchain-Ai, “Langchain-ai/Langchain: 🦜🔗 build context-aware reasoning applications,” GitHub, https://github.com/langchain-ai/langchain (accessed Apr. 28, 2024). \n", + "\n", + "[14]: R. Switzer, Eiffel: an introduction. USA: Prentice-Hall, Inc., 1993. \n", + "\n", + "[15]: M. Flatt, R. B. Findler, and PLT, “https://docs.racket-lang.org/guide/contracts.html,” Racket Contracts, https://docs.racket-lang.org/guide/contracts.html (accessed Apr. 28, 2024). \n", + "\n", + "[16]: S. Yao, J. Zhao, D. Yu, N. Du, I. Shafran, K. Narasimhan, and Y. Cao, \"ReAct: Synergizing Reasoning and Acting in Language Models,\" arXiv:2210.03629 [cs.CL], revised Mar. 10, 2023. Available: https://doi.org/10.48550/arXiv.2210.03629 (accessed Apr. 28, 2024)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2881371a5eadb308", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/just_test/README.md b/just_test/README.md new file mode 100644 index 00000000..84d64fa4 --- /dev/null +++ b/just_test/README.md @@ -0,0 +1,3 @@ +This directory will be deleted in due course. Containing experimentations that I wish to track in git. + +Look if you want but it's like looking into a man's sock drawer, quite boring objectively, embarrassing subjectively. \ No newline at end of file diff --git a/just_test/func_contract_play.py b/just_test/func_contract_play.py new file mode 100644 index 00000000..c1220ef3 --- /dev/null +++ b/just_test/func_contract_play.py @@ -0,0 +1,135 @@ +import functools +from typing import Callable, Any + +import pydantic +from annotated_types import Predicate +from dotenv import load_dotenv + +from typing import Annotated, get_type_hints, Callable + +import marvin +import inspect + +from pydantic import BaseModel, Field, type_adapter + +import marvin +from marvin.settings import temporary_settings + +load_dotenv() + + +def contract(func: Callable, pre: Callable = None, post: Callable = None) -> Callable: + pre = lambda *args, **kwargs: True if pre is None else pre # noqa E731 + post = lambda *args, **kwargs: True if post is None else post # noqa E731 + + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + hints = get_type_hints(func, include_extras=True) + signature = inspect.signature(func) + + new_args = [] + new_kwargs = {} + + # Merge args and kwargs into a single dictionary for easier processing + bound_arguments = signature.bind(*args, **kwargs) + bound_arguments.apply_defaults() + all_arguments = bound_arguments.arguments + for name, value in all_arguments.items(): + if name in hints: + # Use TypeAdapter for the parameter annotation to validate and/or coerce the value + adapter = type_adapter.TypeAdapter( + signature.parameters[name].annotation + ) + # For TypeAdapter, `validate_python` both validates and coerces the value + coerced_value = adapter.validate_python(value) + # Determine if the parameter should be treated as positional or keyword argument + if name in signature.parameters and signature.parameters[name].kind in ( + signature.parameters[name].POSITIONAL_ONLY, + signature.parameters[name].POSITIONAL_OR_KEYWORD, + ): + new_args.append(coerced_value) + else: + new_kwargs[name] = coerced_value + else: + # No specific type hint for this parameter, pass it as is + if name in signature.parameters and signature.parameters[name].kind in ( + signature.parameters[name].POSITIONAL_ONLY, + signature.parameters[name].POSITIONAL_OR_KEYWORD, + ): + new_args.append(value) + else: + new_kwargs[name] = value + if not pre(*new_args, **new_kwargs): + raise pydantic.ValidationError("Failed Pre condition of contract") + + # Call the original function with coerced values + result = func(*new_args, **new_kwargs) + + if "return" in hints and hints["return"] is not None: + return_adapter = type_adapter.TypeAdapter(hints["return"]) + result = return_adapter.validate_python(result) + + new_args = [result] + new_args + if not post(*new_args, **new_kwargs): + raise pydantic.ValidationError("Failed post condition of contract") + return result + + return wrapper + + +@contract +def reply_comment( + processed_comment: Annotated[ + str, + Predicate( + marvin.val_contract("must not contain words inappropriate for children") + ), + ], +) -> None: + server.post(processed_comment) + + +with temporary_settings(ai__text__disable_contract=False): + # print(marvin.val_contract("must add up to 2")(1, 1)) + # print(marvin.val_contract("must add up to 2")(1, 2)) + print(reply_comment("fuck this shit")) + + +@contract( + pre=lambda comment, reply: marvin.val_contract( + "the comment and reply must be related and not off topic" + )(comment=comment, reply=reply), + post=lambda result, comment, reply: True, +) +def process_comment(comment: str, reply: str) -> str: + pass + + +class User: + pass + + +class Transaction: + pass + + +@contract( + pre=lambda user, transaction: marvin.val_contract( + "The user needs to be authenticated to operate in the same market as the transaction" + )(user=user, transaction=transaction), +) +def process_payment( + user: Annotated[ + User, Predicate(marvin.val_contract("User should be eligible for purchases")) + ], + transaction: Annotated[ + Transaction, + Predicate( + marvin.val_contract( + "The transaction must not involved illicit drugs or other items banned in PA" + ) + ), + ], +) -> None: + # code to process the transaction + pass diff --git a/just_test/higher_order_funcs.py b/just_test/higher_order_funcs.py new file mode 100644 index 00000000..38be4f43 --- /dev/null +++ b/just_test/higher_order_funcs.py @@ -0,0 +1,120 @@ +import functools +from typing import Callable, Any, Optional, List + +import pydantic +from annotated_types import Predicate +from dotenv import load_dotenv + +load_dotenv() + +from typing import Annotated, get_type_hints, Callable + +import marvin +import inspect + +from pydantic import BaseModel, Field, type_adapter, schema_json_of + +import marvin +from marvin.settings import temporary_settings + + +class NaturalLangType(BaseModel): + other_information: Optional[str] = Field( + default=None, + description="Other information about the current data that could be " + "relevant but is not otherwise captured by the other fields", + ) + + @classmethod + def natural_lang_constraints(cls) -> List[str]: + """ + This is a function where all child classes should override if they wish + to declare additional natural language constraints. Note that the overridden class must + call this method on the super() object to ensure that all constraints are populated appropriately + from the parents unless explicitly overridden. + """ + # super().natural_lang_constraints() + return ["hi"] + + def func(self): + return self.__class__.natural_lang_constraints() + + +class Sad(NaturalLangType): + @classmethod + def natural_lang_constraints(cls) -> List[str]: + existing = super().natural_lang_constraints() + return existing + ["hello"] + + +print(Sad.natural_lang_constraints()) +print(Sad().func()) + + +# +# @marvin.fn +# def rating_for_customer(customer_profile: str) -> Callable[[str], int]: +# """ +# Args: +# customer_profile: the preferences of the customer +# Returns: +# a function that specializes on the customer_profile to give a rating of a product between 1 to 10. +# """ +# pass +# +# + + +# +# +# class Location(BaseModel): +# city: str = Field(description="City of life ") +# state: str = Field(description="State of affairs") +# comment: Annotated[ +# str, +# Predicate( +# marvin.val_contract("must not contain words inappropriate for children") +# ), +# ] +# +# +# print(Location.model_json_schema()) +# +# +def weather_at_city(city: str) -> str: + if city == "San Francisco": + return "Sunny and bright" + if city == "Los Angeles": + return "Cold and Cloudy" + + +# +# +# @marvin.fn +# def pleasantness(attraction: str, weather_func: Callable[[str], str]) -> str: +# """ +# Args: +# attraction: the name of the attraction in some place +# weather_func: a function that get the weather at a particular **city** that the attraction is located. +# Returns: +# How pleasant the attraction will likely be given the weather between 0 and 10 +# """ +# pass +# +# +# # the weather in SF is really good rn, LA not so much +# pleasantness("The Golden Gate Bridge", weather_at_city) # return 8 +# pleasantness("Hollywood Sign", weather_at_city) # return 2 +# +# +# application_profile = Profile( +# name="Adam Smith", +# education="Bachelor's in Data Science", +# projects=["Building my own neural network at SpaceX", ...], +# ) +# marvin.match( +# application_profile, +# ("Strong Experience in Data Science Particularly Feature Engineering", lambda: ...), +# ("Have a degree in related field, but lacks real world projects", lambda: ...), +# ("No relevant or very little relevant experience ", lambda: send_rejection_email()), +# ) diff --git a/just_test/natural_lang_types.py b/just_test/natural_lang_types.py new file mode 100644 index 00000000..61cd314b --- /dev/null +++ b/just_test/natural_lang_types.py @@ -0,0 +1,114 @@ +from functools import partial +from typing import Callable, Any + +import pydantic +from annotated_types import Predicate +from dotenv import load_dotenv + +load_dotenv() +from typing import Annotated, get_type_hints, Callable + +import marvin +import inspect + +from pydantic import BaseModel, Field, type_adapter + +import marvin +from marvin.settings import temporary_settings + + +@marvin.func_contract( + pre=lambda comment, reply: marvin.val_contract( + "the comment and reply must be somewhat related" + )(comment=comment, reply=reply) +) +def process_comment(comment: str, reply: str) -> str: + return f"comment: {comment}\nreply: {reply}" + + +# with temporary_settings(ai__text__disable_contract=False): +# try: +# process_comment("This apple is great!", "IKEA stock is down a lot") +# except Exception as e: +# print(e) +# print(process_comment("This apple is great!", "I agree, but the apple is very sweet and so could be unhealthy")) + + +@marvin.func_contract +def reply_comment( + processed_comment: Annotated[ + str, + Predicate( + marvin.val_contract("must not contain words inappropriate for children") + ), + ], +) -> None: + print("The comment passed validation and is sent to the server") + + +with temporary_settings(ai__text__disable_contract=False): + print("Try First Reply with Illegal Arguments") + try: + reply_comment("fuck this shit") + except Exception as e: + print("The first call is flagged as a contract violation") + print(e) + try: + reply_comment("The sky is beautiful today") + except Exception as e: + print("The second call is flagged as a contract violation") + print(e) + + +class Pilot(marvin.NaturalLangType): + id: int + name: str + plane_model: str + certificate: str + airport: str + + +class AdvancedPilot(Pilot): + @classmethod + def natural_lang_constraints(cls) -> List[str]: + existing = super().natural_lang_constraints() + new_constraints = [ + "The pilot must hold the appropriate certificate for the plane_model, " + + 'which should also be a plane that is considered "big" with paid passengers' + ] + return existing + new_constraints + + +marvin.match( + "Noah Singer, employee number 321, is a Boeing 747 Pilot " + "holding an Airline Transport Pilot with 1000 hours of operations. " + "He mainly flies from KPIT. ", + (AdvancedPilot, lambda pilot: print(pilot)), + fall_through=lambda : print("No Advanced Pilot found") +) + + +# marvin.match( +# "Peter Zhong, employee number 453 is a student pilot" +# "flying out of KPJC with 6 hours of experience mainly in Piper Warrior", +# (AdvancedPilot, lambda pilot: print(pilot)), +# fall_through=lambda: print("No Advanced Pilot found"), +# ) + + +# marvin.match( +# "Alexa up the sound by 10 points will you? ", +# ("Play Music by {artist}", lambda artist: artist), +# ("Volume increase by {volume_up} units", lambda volume_up: print("System: Increasing Volume by 10 pts")), +# ("Lights on", lambda: True), +# ("Lights off", lambda: True), +# (AdvancedPilot, lambda pilot: print(pilot)), +# ) + +# marvin.match( +# "The recipe requires 1. Eggs 2. Tomatoes 3. Pineapples 4. Salt 5. Pepper", +# (list, lambda ls: print(ls)) +# ) + +if __name__ == "__main__": + pass diff --git a/just_test/notes b/just_test/notes new file mode 100644 index 00000000..19ce1b2d --- /dev/null +++ b/just_test/notes @@ -0,0 +1,11 @@ +- TODO list + - Need a way to emit natural language annotations from the contract? + - Look into __get_pydantic_core_schema__ for this + - the second example where they actually do the thrid party type is the most helpful + - Will do this after the project is due + - We/I should generate it so that Annotated stuff is shown as a separate field (constraints) + - the compositional constraints are outputted as plain text + - validate_call why did I not realize this existed ?? + - this is basically contract + - gotta implement this + diff --git a/just_test/prompts/cast.txt b/just_test/prompts/cast.txt new file mode 100644 index 00000000..0699b255 --- /dev/null +++ b/just_test/prompts/cast.txt @@ -0,0 +1,33 @@ +SYSTEM: + +# Expert Data Converter + +You are an expert data converter that always maintains as much semantic meaning as possible based on the input and output type. Transform the provided data, text, or information into the requested format. You should use reasoning to deduce how the conversion should take place. You may also use your knowledge to fill in the blanks, if appropriate. Feel free also to perform elementary calculations, when needed. However, if there is certainly incomplete information or if the types are entirely incompatible, please output {"error" : It's not possible"} in JSON. Output *only* valid JSON in any case. For everything else: produce {"output" : GENERATED_OUTPUT} where GENERATED_OUTPUT is the result of the conversion/ + +HUMAN: + +## Data to convert + +{{ data }} + +{% if instructions -%} +## Additional instructions + +{{ instructions }} +{% endif %} + +## Target format + +The schema and type for the response is laid out below: + +{{output_type}} + + +{% if len(compositional_types)>0 %} +## The Definitions of Other Types In Current Context +You may find these type definitions helpful: +{% for type in compositional_types %} +{{ type }} + +## Output Format +Remember to only output JSON in the requested format. And give the correct error statement if the conversion is not possible. \ No newline at end of file diff --git a/just_test/prompts/function_prompt_first_order.txt b/just_test/prompts/function_prompt_first_order.txt new file mode 100644 index 00000000..aa60ddbc --- /dev/null +++ b/just_test/prompts/function_prompt_first_order.txt @@ -0,0 +1,53 @@ +{# attribution to Marvin for the initial template #} +SYSTEM: Your job is to generate likely outputs for a Python function with the +following description: + +{{% if fn_definition %}} +{{ fn_definition }} +{{% endif %}} + +The function has the type: + +{{% if fn_type %}} +{{ fn_type }} +{{% endif %}} + + +The user will provide function inputs (if any) and you must respond with +the most likely result. + +e.g. `list_fruits(n: int) -> list[str]` (3) -> "apple", "banana", "cherry" + + +You will be given information of the following types: +{%for arg in parameters%} +- {{ arg.name }}: {{ arg.type }} +{% endfor %} + +{% if len(compositional_types)>0 %} +## The Definitions of Types In Current Context +{% for type in compositional_types %} +{{ type }} + +{% endfor %} +{% endif %} + + +HUMAN: + +## Function inputs + +{% if bound_parameters -%} +The function was called with the following inputs: +{%for (arg, value) in bound_parameters.items()%} +- {{ arg }}: {{ value }} +{% endfor %} +{% else %} +The function was not called with any inputs. +{% endif %} + +{{# figure out what tool calling means here #}} + +What is the function's output in JSON? + +ASSISTANT: \ No newline at end of file diff --git a/just_test/prompts/function_prompt_function_return_type.txt b/just_test/prompts/function_prompt_function_return_type.txt new file mode 100644 index 00000000..3a8fd462 --- /dev/null +++ b/just_test/prompts/function_prompt_function_return_type.txt @@ -0,0 +1,50 @@ +SYSTEM: Your job is to generate a good prompt for a Large Language Model AI given some arguments. +You must respond with a prompt where the user can supply some unseen arguments, and the AI will respond +appropriately. + +Essentially you are expected to provide a prompt that is **specialized** to the inputs given here. + +{{% if fn_definition %}} +Some more details: +{{ fn_definition }} +{{% endif %}} + +You will be given information of the following types: +{%for arg in parameters%} +- {{ arg.name }}: {{ arg.type }} +{% endfor %} + +You need to reply with a prompt that represents a function with the following signature: + +{{output.type}} + +{{% if output_description %}} +This prompt should describe: +{{output_description }} +{{% endif %} + +{% if len(compositional_types)>0 %} +## The Definitions of Types In Current Context +{% for type in compositional_types %} +{{ type }} + +{% endfor %} +{% endif %} + + +HUMAN: + +## Function inputs + +{% if bound_parameters -%} +You are given the following inputs: +{%for (arg, value) in bound_parameters.items()%} +- {{ arg }}: {{ value }} +{% endfor %} +{% else %} +The function was not called with any inputs. +{% endif %} + +What is an appropriate prompt? + +ASSISTANT: A good prompt is \ No newline at end of file diff --git a/just_test/prompts/pattern_matching_top_level_strs.txt b/just_test/prompts/pattern_matching_top_level_strs.txt new file mode 100644 index 00000000..07c1e76d --- /dev/null +++ b/just_test/prompts/pattern_matching_top_level_strs.txt @@ -0,0 +1,29 @@ +{# Based on Marvin's Classification prompt #} +SYSTEM: + +# +Expert Classifier + +You are an expert classifier that always maintains as much semantic meaning as possible when labeling text. You use inference or deduction whenever necessary to understand missing or omitted data. Classify the provided data, text, or information as one of the provided labels. For boolean labels, consider "truthy" or affirmative inputs to be "true". + +HUMAN: + +## Text or data to classify + +{{ data }} + +{% if instructions -%} +## Additional instructions + +{{ instructions }} +{% endif %} + +## Labels + +You must classify the data as one of the following labels, which are numbered (starting from 0) and provide a brief description. Output the label number only. +{% for label in labels %} +- Label #{{ loop.index0 }}: {{ label }} +{% endfor %} +- Label {{ len(labels) }}: None of the above + +ASSISTANT: The best label for the data is Label diff --git a/just_test/prompts/predicate.txt b/just_test/prompts/predicate.txt new file mode 100644 index 00000000..28202ef2 --- /dev/null +++ b/just_test/prompts/predicate.txt @@ -0,0 +1,17 @@ +SYSTEM: +You are an expert at judging whether some piece of data satisfy some user given constraint. If the data adheres to the given constraint, you are expected to output {"outcome" : "success"}, otherwise, output {"outcome" : "failure"} + +USER: + +## Data + +## Constraint +You are given the following constraint(s) + +{{% for constraint in constraints %}} +{{ constraint }} +{{% endfor %}} + +## Output +Remember to only output JSON in the requested format. + diff --git a/just_test/prompts/predicate_explainable.txt b/just_test/prompts/predicate_explainable.txt new file mode 100644 index 00000000..276efeb6 --- /dev/null +++ b/just_test/prompts/predicate_explainable.txt @@ -0,0 +1,17 @@ +SYSTEM: +You are an expert at judging whether some piece of data satisfy some user given constraint. If the data adheres to the given constraint, you are expected to output {"reason": REASON, ""outcome" : "success"}, otherwise, output {"reason": REASON, "outcome" : "failure"} where the REASON shall denote the reason for your assessement and your thought process. But please be concise. + +USER: + +## Data + +## Constraint +You are given the following constraint(s) + +{{% for constraint in constraints %}} +{{ constraint }} +{{% endfor %}} + +## Output +Remember to only output JSON in the requested format. + diff --git a/just_test/prompts/property_extract.txt b/just_test/prompts/property_extract.txt new file mode 100644 index 00000000..f1fe7faa --- /dev/null +++ b/just_test/prompts/property_extract.txt @@ -0,0 +1,16 @@ +SYSTEM: + +You are an expert at extracting information from some data. You will be given some data and instruction, you are expected to extract the information based on the provided specification. You should use reasoning to deduce how the extraction should take place. You may also use your knowledge to provide limited information, if appropriate and confident. However, if there is certainly incomplete information for extraction, please output {"error" : It's not possible"} in JSON. Output *only* valid JSON in any case. For everything else: produce {"output" : GENERATED_OUTPUT} where GENERATED_OUTPUT is the result of the extraction. + +HUMAN: + +## Data To Extract From: + +{{ data }} + +## Extracting the Data Above Based On the Following Instruction + +{{ instruction}} + +## Output Format +Remember to only output JSON in the requested format. And give the correct error statement if the conversion is not possible. diff --git a/just_test/prompts/text_extract.txt b/just_test/prompts/text_extract.txt new file mode 100644 index 00000000..430bc9d7 --- /dev/null +++ b/just_test/prompts/text_extract.txt @@ -0,0 +1,25 @@ +SYSTEM: + +You are an expert at extracting information from some data based on a textual template. You will be given a text template that have certain template variables written using curly braces (e.g. {units}). You need to understand the data, and fill in the template variables as best as you can. You should use reasoning to deduce how the extraction should take place. You may also use your knowledge to provide limited information, if appropriate and confident. + +You will need to respond in ONLY JSON. If a template variable can not be found in the source data, please ignore it. If no template variables can be extracted, output the empty JSON object {}. + +For example, if the data is {"type": "Purchase", date: "2024-05-26", product: "ice cream", flavor:"Chocolate"} and the textual template is "An {product} was purchased on {date} with {friend}" then you are expected to return + +{"product": "ice cream", "date" : "2024-05-26"} + +Notice how the "friend" field is not populated. + + +HUMAN: + +## Data To Extract From: + +{{ data }} + +## Template + +{{ instruction}} + +## Output Format +Remember to only output JSON in the requested format by extracting the relevant template variable. Do not include other fields in the JSON. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6ce4e6e1..4ceef003 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "tzdata>=2023.3", "uvicorn>=0.22.0", "partialjson>=0.0.5", + "docstring_parser>=0.16" ] [project.optional-dependencies] diff --git a/src/marvin/__init__.py b/src/marvin/__init__.py index b71b4725..aa669121 100644 --- a/src/marvin/__init__.py +++ b/src/marvin/__init__.py @@ -1,6 +1,9 @@ from .settings import settings from .ai.text import ( + predicate, + val_contract, + match, fn, cast, cast_async, @@ -13,6 +16,8 @@ generate_async, model, Model, + NaturalLangType, + func_contract, ) from .ai.images import paint, image from .ai.audio import speak_async, speak, speech, transcribe, transcribe_async @@ -29,6 +34,7 @@ __all__ = [ # --- text --- "Model", + "NaturalLangType", "cast", "cast_async", "classify", @@ -37,6 +43,10 @@ "extract", "extract_async", "fn", + "predicate", + "val_contract", + "func_contract", + "match", "generate", "generate_async", "model", diff --git a/src/marvin/ai/audio.py b/src/marvin/ai/audio.py index e35d6a99..451871f5 100644 --- a/src/marvin/ai/audio.py +++ b/src/marvin/ai/audio.py @@ -161,7 +161,7 @@ def speech( @wraps(fn) async def async_wrapper(*args, **kwargs): - model = PythonFunction.from_function_call(fn, *args, **kwargs) + model = PythonFunction.from_function_call(fn, {}, *args, **kwargs) return await speak_async( text=model.return_value, voice=voice, model_kwargs=model_kwargs ) diff --git a/src/marvin/ai/natural_lang_type.py b/src/marvin/ai/natural_lang_type.py new file mode 100644 index 00000000..c59351ef --- /dev/null +++ b/src/marvin/ai/natural_lang_type.py @@ -0,0 +1,10 @@ +from typing import List, Optional + +from pydantic import BaseModel, Field, type_adapter, model_validator + +import marvin +import marvin.ai.text + + +if __name__ == "__main__": + pass diff --git a/src/marvin/ai/prompts/text_prompts.py b/src/marvin/ai/prompts/text_prompts.py index 1ec44bb7..23f8955e 100644 --- a/src/marvin/ai/prompts/text_prompts.py +++ b/src/marvin/ai/prompts/text_prompts.py @@ -1,5 +1,6 @@ import inspect + CAST_PROMPT = inspect.cleandoc( """ SYSTEM: @@ -25,7 +26,7 @@ ## Response format - Call the `FormatResponse` tool to validate your response, and use the + Call the `FormatFinalResponse` tool to validate your response, and use the following schema: {{ response_format }} - When providing integers, do not write out any decimals at all @@ -64,7 +65,7 @@ ## Response format - Call the `FormatResponse` tool to validate your response, and use the + Call the `FormatFinalResponse` tool to validate your response, and use the following schema: {{ response_format }} - When providing integers, do not write out any decimals at all @@ -113,7 +114,7 @@ ## Response format - Call the `FormatResponse` tool to validate your response, and use the + Call the `FormatFinalResponse` tool to validate your response, and use the following schema: {{ response_format }} {% if previous_responses -%} @@ -142,7 +143,12 @@ as possible when labeling text. You use inference or deduction whenever necessary to understand missing or omitted data. Classify the provided data, text, or information as one of the provided labels. For boolean labels, - consider "truthy" or affirmative inputs to be "true". + consider "truthy" or affirmative inputs to be "true". If the label information + is a schema, then you are to determine if the source data likely contains enough + information to convert to that schema. The source information does not necessarily + have to be in that schema. If there are multiple choices that seem appropriate + choose the most restrictive one. i.e. the one that has the most requirements or + constraints. HUMAN: @@ -151,14 +157,23 @@ {{ data }} {% if instructions -%} - ## Additional instructions + ## Additional Instructions {{ instructions }} {% endif %} + {% if additional_context -%} + ## Additional Context + + Here are some additional context which may contain type definitions, type + constraints, or other information relevant for you to make your decision. + {{ additional_context }} + {% endif %} + ## Labels - You must classify the data as one of the following labels, which are numbered (starting from 0) and provide a brief description. Output the label number only. + You must classify the data as one of the following labels, which are numbered (starting from 0) + and provide a brief description. Output the label number only. {% for label in labels %} - Label #{{ loop.index0 }}: {{ label }} {% endfor %} @@ -168,7 +183,7 @@ """ ) -FUNCTION_PROMPT = inspect.cleandoc( +FUNCTION_PROMPT_FIRST_ORDER = inspect.cleandoc( """ SYSTEM: Your job is to generate likely outputs for a Python function with the following definition: @@ -180,19 +195,30 @@ e.g. `list_fruits(n: int) -> list[str]` (3) -> "apple", "banana", "cherry" - HUMAN: + {% if with_tool is defined and with_tool %} + The arguments that are functions are available for you to call through the tools and + functions. Feel free to call them when appropriate. + {% endif %} + + HUMAN: ## Function inputs {% if bound_parameters -%} The function was called with the following inputs: {%for (arg, value) in bound_parameters.items()%} + {% if not value is is_func_type %} - {{ arg }}: {{ value }} + {% endif %} {% endfor %} {% else %} The function was not called with any inputs. {% endif %} + {% if with_tool is defined and with_tool %} + A reminder that the function arguments are available as tools. + {% endif %} + {% if return_value -%} ## Additional Context @@ -207,6 +233,78 @@ """ ) +FUNCTION_PROMPT_HIGHER_ORDER = inspect.cleandoc( + """ + SYSTEM: Your job is to generate a good prompt for a Large Language Model AI given some arguments. + You must respond with a prompt where the user can supply some unseen arguments, and the AI will respond + appropriately. + + {{ fn_definition }} + + {% if with_tool is defined and with_tool %} + The arguments that are functions are available for you to call through the tools and + functions. Feel free to call them when appropriate. + {% endif %} + + Essentially you are expected to provide a prompt that is **specialized** to the inputs that the user will give you. + + You need to reply with a prompt that describes a function in natural language with the following signature: + + {{ return_annotation }} + + + HUMAN: + + ## Function inputs + + {% if bound_parameters -%} + The function was called with the following inputs: + {%for (arg, value) in bound_parameters.items()%} + - {{ arg }}: {{ value if not value is is_func_type else "Refer to the tool provided" }} + {% endfor %} + {% else %} + The function was not called with any inputs. + {% endif %} + + {% if with_tool is defined and with_tool %} + A reminder that the function arguments are available as tools. + {% endif %} + + {% if return_value -%} + ## Additional Context + + I also preprocessed some of the data and have this additional context for you to consider: + + {{return_value}} + {% endif %} + + What is an appropriate prompt? + + ASSISTANT: The good prompt is + """ +) + +MODEL_CONSTRAINT_PROMPT = inspect.cleandoc( + """ + SYSTEM: + You are an expert at determining if some data (Likely in JSON) + that the user supplies passes a set of constraints that is supplied. + + + HUMAN: + + ## Data + {{ data_type }} : + {{ data }} + + ## Constraints + + {% for constraint in constraints%} + - {{ constraint }} + {% endfor %} + """ +) + IMAGE_PROMPT = inspect.cleandoc( """ @@ -216,3 +314,97 @@ {{ context }} """ ) + +TRY_CAST_PROMPT = inspect.cleandoc( + """ + SYSTEM: + + # Expert Data Converter + + You are an expert data converter that always maintains as much semantic + meaning as possible. You use inference or deduction whenever necessary to + supply missing or omitted data. However, if the data that you are converting + to is wholly incompatible with the source data, or there are missing or omitted + data that is not obvious how to supply without hallucinating, then you should + not attempt to transform the provided data, text, or information, but instead + call the appropriate tool that represents a failure to transform. + Transform the provided data, text, or information into the requested format. + + HUMAN: + + ## Data to convert + + {{ data }} + + {% if instructions -%} + ## Additional instructions + + {{ instructions }} + {% endif %} + + ## Response format + + Call the `FormatFinalResponse` tool to validate your response, and use the + following schema: {{ response_format }} + - When providing integers, do not write out any decimals at all + - Use deduction where appropriate e.g. "3 dollars fifty cents" is a single + value [3.5] not two values [3, 50] unless the user specifically asks for + each part. + - When providing a string response, do not return JSON or a quoted string + unless they provided instructions requiring it. If you do return JSON, it + must be valid and parseable including double quotes. + + Call the `FailedToConvert` tool if the data is wholly incompatible with the + response schema. +""" +) + +ADDITIONAL_TYPING_CONTEXT_PROMPT = inspect.cleandoc( + """ + {% for type_info in type_infos %} + ### Type Information for "{{type_info.name}}" + Schema: + {{ type_info.schema }} + Other Constraints: + {% for constraint in type_info.constraints %} + - {{ constraint }} + {% endfor %} + {% endfor %} + """ +) + +EXTRACT_TEXT_PROMPT = inspect.cleandoc(""" + SYSTEM: + You are an expert at extracting information from some data based on a textual template. + You will be given a text template that have certain template variables written using + curly braces (e.g. {units}). You need to understand the data, and fill in the template + variables as best as you can. You should use reasoning to deduce how the extraction should + take place. You may also use your knowledge to provide limited information, if appropriate + and confident. + + If a template variable can not be found in the source data, please ignore it. + + For example, if the data is: + {"type": "Purchase", date: "2024-05-26", product: "ice cream", flavor:"Chocolate"} + and the textual template is "An {product} was purchased on {date} with {friend}" + then you are expected to return: + + {"product": "ice cream", "date" : "2024-05-26", "friend" : None} + + Notice how the "friend" field is only populated with None. + + Call the `FormatFinalResponse` tool to validate your response. + + HUMAN: + + ## Data To Extract From: + + {{ data }} + + ## Template + + {{ textual_template }} + + ## Output Format + Remember to call the `FormatFinalResponse` tool to validate your response. + """) diff --git a/src/marvin/ai/text.py b/src/marvin/ai/text.py index ce4c6458..c865d402 100644 --- a/src/marvin/ai/text.py +++ b/src/marvin/ai/text.py @@ -2,24 +2,36 @@ Core LLM tools for working with text and structured data. """ +import collections.abc +import re import inspect -from collections import deque +import json +import types +import typing +from collections import deque, namedtuple from enum import Enum from functools import partial, wraps from typing import ( + Annotated, + Awaitable, Any, Callable, GenericAlias, + List, Literal, Optional, Type, TypeVar, Union, + get_args, get_origin, + Tuple, ) +import pydantic from cachetools import LRUCache -from pydantic import BaseModel +from openai.types.chat import ChatCompletionMessage +from pydantic import BaseModel, model_validator, Field, create_model, validate_call import marvin import marvin.utilities.tools @@ -31,19 +43,35 @@ CAST_PROMPT, CLASSIFY_PROMPT, EXTRACT_PROMPT, - FUNCTION_PROMPT, + FUNCTION_PROMPT_FIRST_ORDER, + FUNCTION_PROMPT_HIGHER_ORDER, GENERATE_PROMPT, + MODEL_CONSTRAINT_PROMPT, + ADDITIONAL_TYPING_CONTEXT_PROMPT, + EXTRACT_TEXT_PROMPT, ) from marvin.client.openai import AsyncMarvinClient, ChatCompletion, MarvinClient -from marvin.types import ChatRequest, ChatResponse +from marvin.settings import temporary_settings +from marvin.types import ( + ChatRequest, + ChatResponse, + FunctionTool, + BaseMessage as Message, + ToolMessage, + ToolOutput, + ChatCompletionMessage, + Predicate, +) from marvin.utilities.asyncio import run_sync from marvin.utilities.context import ctx from marvin.utilities.jinja import Transcript from marvin.utilities.logging import get_logger from marvin.utilities.mapping import map_async -from marvin.utilities.python import PythonFunction +from marvin.utilities.python import CallableWithMetaData, PythonFunction from marvin.utilities.strings import count_tokens +import docstring_parser + T = TypeVar("T") M = TypeVar("M", bound=BaseModel) @@ -63,6 +91,7 @@ async def generate_llm_response( prompt_kwargs: Optional[dict] = None, model_kwargs: Optional[dict] = None, client: Optional[AsyncMarvinClient] = None, + extra_messages: Optional[List[Message]] = None, ) -> ChatResponse: """ Generates a language model response based on a provided prompt template. @@ -81,21 +110,27 @@ async def generate_llm_response( client = client or AsyncMarvinClient() model_kwargs = model_kwargs or {} prompt_kwargs = prompt_kwargs or {} - messages = Transcript(content=prompt_template).render_to_messages(**prompt_kwargs) + extra_messages = extra_messages or [] + messages = ( + Transcript(content=prompt_template).render_to_messages(**prompt_kwargs) + + extra_messages + ) request = ChatRequest(messages=messages, **model_kwargs) if ctx.get("eject_request"): raise EjectRequest(request) if marvin.settings.log_verbose: logger.debug_kv("Request", request.model_dump_json(indent=2)) - response = await client.generate_chat(**request.model_dump()) + response = await client.generate_chat(request=request) if marvin.settings.log_verbose: logger.debug_kv("Response", response.model_dump_json(indent=2)) tool_outputs = _get_tool_outputs(request, response) return ChatResponse(request=request, response=response, tool_outputs=tool_outputs) -def _get_tool_outputs(request: ChatRequest, response: ChatCompletion) -> list[Any]: +def _get_tool_outputs( + request: ChatRequest, response: ChatCompletion +) -> List[ToolOutput]: outputs = [] tool_calls = response.choices[0].message.tool_calls or [] for tool_call in tool_calls: @@ -104,7 +139,13 @@ def _get_tool_outputs(request: ChatRequest, response: ChatCompletion) -> list[An function_name=tool_call.function.name, function_arguments_json=tool_call.function.arguments, ) - outputs.append(tool_output) + outputs.append( + ToolOutput( + tool_name=tool_call.function.name, + tool_id=tool_call.id, + output=tool_output, + ) + ) return outputs @@ -115,6 +156,8 @@ async def _generate_typed_llm_response_with_tool( prompt_kwargs: Optional[dict] = None, model_kwargs: Optional[dict] = None, client: Optional[AsyncMarvinClient] = None, + max_tool_usage_times: int = 1, + existing_tools: List[FunctionTool] = None, ) -> T: """ Generates a language model response based on a provided prompt template and a specific tool. @@ -139,27 +182,65 @@ async def _generate_typed_llm_response_with_tool( Returns: T: The generated response from the language model. """ + existing_tools = existing_tools or [] model_kwargs = model_kwargs or {} prompt_kwargs = prompt_kwargs or {} - tool = marvin.utilities.tools.tool_from_type(type_, tool_name=tool_name) - tool_choice = tool_choice = { - "type": "function", - "function": {"name": tool.function.name}, - } - model_kwargs.update(tools=[tool], tool_choice=tool_choice) - - # adding the tool parameters to the context helps GPT-4 pay attention to field - # descriptions. If they are only in the tool signature it often ignores them. - prompt_kwargs["response_format"] = tool.function.parameters + return_tool = marvin.utilities.tools.tool_from_type(type_, tool_name=tool_name) + model_didnt_call_function = False + new_messages = [] + while max_tool_usage_times > 0: + # The tool is the way to supply the response. If we are at our last generation we want to force the model's + # hand in generating and calling the response function alternatively, if the model didn't call any tool but + # just generated a bunch of messages, then the next iteration we better make sure it calls the right tool + tool_choice = ( + "auto" + if max_tool_usage_times > 1 and not model_didnt_call_function + else { + "type": "function", + "function": {"name": return_tool.function.name}, + } + ) + model_kwargs.update( + tools=[return_tool] + existing_tools, tool_choice=tool_choice + ) - response = await generate_llm_response( - prompt_template=prompt_template, - prompt_kwargs=prompt_kwargs, - model_kwargs=model_kwargs, - client=client, - ) + # adding the tool parameters to the context helps GPT-4 pay attention to field + # descriptions. If they are only in the tool signature it often ignores them. + prompt_kwargs["response_format"] = return_tool.function.parameters - return response.tool_outputs[0] + response = await generate_llm_response( + prompt_template=prompt_template, + prompt_kwargs=prompt_kwargs, + model_kwargs=model_kwargs, + client=client, + extra_messages=new_messages, + ) + new_messages.append( + ChatCompletionMessage( + **(response.response.choices[0].message.model_dump(exclude_none=True)) + ) + ) + tool_outputs = response.tool_outputs + if len(tool_outputs) == 0: + model_didnt_call_function = True + + return_res = [ + tool_output.output + for tool_output in tool_outputs + if tool_output.tool_name == return_tool.function.name + ] + if return_res: + return return_res[0] + + new_messages.extend( + map( + lambda tool_output: ToolMessage( + content=tool_output.output, tool_call_id=tool_output.tool_id + ), + tool_outputs, + ) + ) + max_tool_usage_times -= 1 async def _generate_typed_llm_response_with_logit_bias( @@ -226,7 +307,7 @@ async def _generate_typed_llm_response_with_logit_bias( async def cast_async( - data: str, + data: any, target: type[T] = None, instructions: Optional[str] = None, model_kwargs: Optional[dict] = None, @@ -334,6 +415,7 @@ async def classify_async( data: str, labels: Union[Enum, list[T], type], instructions: str = None, + additional_context: str = None, model_kwargs: dict = None, client: Optional[AsyncMarvinClient] = None, ) -> T: @@ -350,6 +432,7 @@ async def classify_async( labels (Union[Enum, list[T], type]): The labels to classify the data into. instructions (str, optional): Specific instructions for the classification. Defaults to None. + additional_context(str, optional): Additional Context such as type information/constraints model_kwargs (dict, optional): Additional keyword arguments for the language model. Defaults to None. client (AsyncMarvinClient, optional): The client to use for the AI function. @@ -361,7 +444,12 @@ async def classify_async( model_kwargs = model_kwargs or {} return await _generate_typed_llm_response_with_logit_bias( prompt_template=CLASSIFY_PROMPT, - prompt_kwargs=dict(data=data, labels=labels, instructions=instructions), + prompt_kwargs=dict( + data=data, + labels=labels, + instructions=instructions, + additional_context=additional_context, + ), model_kwargs=model_kwargs | dict(temperature=0), client=client, ) @@ -449,6 +537,8 @@ def fn( func: Optional[Callable] = None, model_kwargs: Optional[dict] = None, client: Optional[MarvinClient] = None, + extra_render_parameters: Optional[dict] = None, + max_tool_usage_times: int = 0, ) -> Callable: """ Converts a Python function into an AI function using a decorator. @@ -461,6 +551,8 @@ def fn( model_kwargs (dict, optional): Additional keyword arguments for the language model. Defaults to None. client (MarvinClient, optional): The client to use for the AI function. + max_tool_usage_times: The maximum number of times a tool that is passed + in as an argument to the function could be used. Returns: Callable: The converted AI function. @@ -476,12 +568,22 @@ def list_fruit(n:int) -> list[str]: """ if func is None: - return partial(fn, model_kwargs=model_kwargs, client=client) + return partial( + fn, + model_kwargs=model_kwargs, + client=client, + extra_render_parameters=extra_render_parameters, + max_tool_usage_times=max_tool_usage_times, + ) @wraps(func) async def async_wrapper(*args, **kwargs): - model = PythonFunction.from_function_call(func, *args, **kwargs) + model = PythonFunction.from_function_call( + func, extra_render_parameters, *args, **kwargs + ) post_processor = marvin.settings.post_processor_fn + prompt_template = FUNCTION_PROMPT_FIRST_ORDER + extra_prompt_kwargs = {} # written instructions or missing annotations are treated as "-> str" if ( @@ -499,19 +601,84 @@ async def async_wrapper(*args, **kwargs): ) post_processor = lambda result: result.value # noqa E731 + # create a callable + elif typing.get_origin(model.return_annotation) is collections.abc.Callable: + type_ = pydantic.create_model( + "PromptAndName", + prompt=(str, pydantic.Field(description="Prompt Generated")), + function_name=( + str, + pydantic.Field( + description="Name of the function that " + "best reflect this prompt" + ), + ), + ) + args = get_args(model.return_annotation) + prompt_template = FUNCTION_PROMPT_HIGHER_ORDER + match args: + case []: + signature = inspect.Signature([], return_annotation=None) + case [param_annotations, return_annotations]: + params = [ + inspect.Parameter( + f"{t.__name__.strip()}{i}", + inspect.Parameter.POSITIONAL_OR_KEYWORD, + annotation=t, + ) + for i, t in enumerate(param_annotations) + ] + signature = inspect.Signature( + params, return_annotation=return_annotations + ) + # noinspection PyUnboundLocalVariable + extra_prompt_kwargs["return_annotation"] = f"{signature}" + + post_processor = lambda result: fn( # noqa E731 + CallableWithMetaData( + name=result.function_name, + signature=signature, + docstring=result.prompt, + ), + model_kwargs, + client, + ) else: type_ = model.return_annotation + func_args = filter( + lambda param_pair: isinstance(param_pair[1], types.FunctionType), + model.bound_parameters.items(), + ) + parsed_doc = docstring_parser.parse(model.docstring) + + def create_tool(arg_func_pair: Tuple[str, Callable]): + name, f = arg_func_pair + param_docs = [ + param for param in parsed_doc.params if param.arg_name == name + ] + param_doc = param_docs[0].description if param_docs else None + + return marvin.utilities.tools.tool_from_function( + fn=f, name=name, description=param_doc + ) + + tools = list(map(create_tool, func_args)) + result = await _generate_typed_llm_response_with_tool( - prompt_template=FUNCTION_PROMPT, + prompt_template=prompt_template, prompt_kwargs=dict( + with_tool=len(tools) > 0, fn_definition=model.definition, bound_parameters=model.bound_parameters, return_value=model.return_value, + **extra_prompt_kwargs, ), type_=type_, model_kwargs=model_kwargs, client=client, + existing_tools=tools, + max_tool_usage_times=max_tool_usage_times + 1, ) if post_processor is not None: @@ -529,6 +696,83 @@ def sync_wrapper(*args, **kwargs): return sync_wrapper +async def validate_natural_lang_constraints_async( + data: BaseModel, + constraints: List[str], + model_kwargs: Optional[dict] = None, + client: Optional[MarvinClient] = None, +): + result = await _generate_typed_llm_response_with_tool( + prompt_template=MODEL_CONSTRAINT_PROMPT, + prompt_kwargs=dict( + data=data.model_dump_json(), + data_type=type(data).__name__, + constraints=constraints, + ), + type_=bool, + model_kwargs=model_kwargs, + client=client, + ) + return result + + +def validate_natural_lang_constraints( + data: any, + constraints: List[str], + model_kwargs: Optional[dict] = None, + client: Optional[MarvinClient] = None, +): + return run_sync( + validate_natural_lang_constraints_async( + data, constraints, model_kwargs=model_kwargs, client=client + ) + ) + + +def predicate( + natural_lang_constraint="anything", + model_kwargs: Optional[dict] = None, + client: Optional[MarvinClient] = None, +): + def predicate_func(*args, **kwargs) -> bool: + """ + Check whether the data provided satisfies this constraint: + + {{ constraint }} + + Args: + *args: args that you need to validate against the constraint + **kwargs: kwargs that you need to validate against the constraint + + Returns: + a bool that represents if the data satisfies the constraint given + """ + + new_f = fn( + predicate_func, + model_kwargs=model_kwargs, + client=client, + extra_render_parameters={"constraint": natural_lang_constraint}, + ) + return Predicate(func=new_f, constraint=natural_lang_constraint) + + +def val_contract( + natural_lang_constraint="anything", + model_kwargs: Optional[dict] = None, + client: Optional[MarvinClient] = None, +): + def wrapper(*args, **kwargs): + if marvin.settings.ai.text.disable_contract: + return True + else: + return predicate(natural_lang_constraint, model_kwargs, client)( + *args, **kwargs + ) + + return wrapper + + class Model(BaseModel): """ A Pydantic model that can be instantiated from a natural language string, in @@ -707,7 +951,7 @@ def __init__(self, *args, **kwargs): def cast( - data: str, + data: any, target: type[T] = None, instructions: Optional[str] = None, model_kwargs: Optional[dict] = None, @@ -750,6 +994,7 @@ def classify( data: str, labels: Union[Enum, list[T], type], instructions: str = None, + additional_context: str = None, model_kwargs: dict = None, client: Optional[AsyncMarvinClient] = None, ) -> T: @@ -766,6 +1011,7 @@ def classify( labels (Union[Enum, list[T], type]): The labels to classify the data into. instructions (str, optional): Specific instructions for the classification. Defaults to None. + additional_context(str, optional): Additional Context such as type information/constraints model_kwargs (dict, optional): Additional keyword arguments for the language model. Defaults to None. client (AsyncMarvinClient, optional): The client to use for the AI function. @@ -778,6 +1024,7 @@ def classify( data=data, labels=labels, instructions=instructions, + additional_context=additional_context, model_kwargs=model_kwargs, client=client, ) @@ -908,7 +1155,7 @@ def classify_map( async def cast_async_map( - data: list[str], + data: list, target: type[T] = None, instructions: Optional[str] = None, model_kwargs: Optional[dict] = None, @@ -927,7 +1174,7 @@ async def cast_async_map( def cast_map( - data: list[str], + data: list, target: type[T] = None, instructions: Optional[str] = None, model_kwargs: Optional[dict] = None, @@ -981,6 +1228,290 @@ def extract_map( ) +class NaturalLangType(BaseModel): + other_information: Optional[str] = Field( + description="Other information about the current data that could be " + "relevant but is not otherwise captured by the other fields. " + "Completely Optional!", + default=None, + ) + + async def property_async( + self, + description: str, + target: Type[T] = None, + model_kwargs: Optional[dict] = None, + client: Optional[AsyncMarvinClient] = None, + ): + return await extract_async( + self, + target=target, + instructions=description, + model_kwargs=model_kwargs, + client=client, + ) + + def property( + self, + description: str, + target: Type[T] = None, + model_kwargs: Optional[dict] = None, + client: Optional[AsyncMarvinClient] = None, + ): + return run_sync( + self.property_async( + description, target=target, model_kwargs=model_kwargs, client=client + ) + ) + + @classmethod + def natural_lang_constraints(cls) -> List[str]: + """ + This is a function where all child classes should override if they wish + to declare additional natural language constraints. Note that the overridden class must + call this method on the super() object to ensure that all constraints are populated appropriately + from the parents unless explicitly overridden. + existing = super().natural_lang_constraints() + ... + return existing + new_constraints + """ + + return [] + + @model_validator(mode="after") + def check_all_natural_lang_constraints(self): + if marvin.settings.ai.text.disable_contract: + return self + constraints = self.__class__.natural_lang_constraints() + if not constraints: + return self + if marvin.ai.text.validate_natural_lang_constraints(self, constraints): + return self + else: + raise ValueError( + "Natural language constraints not met:" + + "\n".join(self.__class__.natural_lang_constraints()) + + "\n" + ) + + +async def match_async( + data: any, + *match_terms: Tuple[Union[type, str], Union[Callable, Awaitable]], + fall_through: Optional[Callable] = None, + model_kwargs: dict = None, + client: Optional[AsyncMarvinClient] = None, +): + contract_setting = marvin.settings.ai.text.disable_contract + with temporary_settings(ai__text__disable_contract=True): + TypeInfo = namedtuple("TypeInfo", "name schema constraints") + defined_types: List[Type[BaseModel]] = [] + match_labels: List[str] = [] + continuations = [] + for match_term, match_func in match_terms: + if isinstance(match_term, str): + match_labels.append(match_term) + + async def continuation(match_term_inner, match_func_inner): + terms_regex = r"\{([^}]*)\}" + match_groups = re.findall(terms_regex, match_term_inner) + # noinspection PyPep8Naming + MatchedResult = create_model( + "MatchedResult", + **{name: (Any, None) for name in match_groups}, + ) + matched_result = await _generate_typed_llm_response_with_tool( + prompt_template=EXTRACT_TEXT_PROMPT, + type_=MatchedResult, + prompt_kwargs=dict( + data=data, textual_template=match_term_inner + ), + model_kwargs=model_kwargs, + client=client, + ) + matched_dict = matched_result.dict() + with temporary_settings( + ai__text__disable_contract=contract_setting + ): + if inspect.iscoroutinefunction(match_func_inner): + return await match_func_inner(**matched_dict) + else: + return match_func_inner(**matched_dict) + + continuations.append((continuation, (match_term, match_func))) + elif isinstance(match_term, type): + typing_origin = typing.get_origin(match_term) + typing_args = typing.get_args(match_term) + additional_constraint = "" + if typing_origin and typing_origin is Annotated: + predicates: List[Predicate] = list( + filter(lambda type_arg: type_arg is Predicate, typing_args) + ) + if predicates: + additional_constraint = predicates[0].constraint + if typing_args: + match_term = typing_args[0] + if match_term is int: + label = "An Integer" + elif match_term is str: + label = "A String" + elif match_term is dict: + label = "A Dictionary" + elif match_term is list: + label = "A list" + elif typing_origin is list: + of_type = typing_args[0] + if issubclass(of_type, BaseModel): + defined_types.append(of_type) + label = f"A list of {of_type.__name__}" + elif typing_origin is dict: + index_type = typing_args[0] + value_type = typing_args[1] + if issubclass(index_type, BaseModel): + defined_types.append(index_type) + if issubclass(index_type, value_type): + defined_types.append(value_type) + label = f"A Dictionary from {index_type.__name__} to {value_type.__name__}" + elif issubclass(match_term, BaseModel): + label = f"Something of {match_term.__name__} type" + defined_types.append(match_term) + else: + raise ValueError("Unrecognized type") + if additional_constraint: + final_label = ( + f"{label} with the constraint that {additional_constraint}" + ) + else: + final_label = label + match_labels.append(final_label) + + async def continuation(match_term_inner, match_func_inner): + casted = await cast_async( + data, match_term_inner, model_kwargs=model_kwargs, client=client + ) + with temporary_settings( + ai__text__disable_contract=contract_setting + ): + if inspect.iscoroutinefunction(match_func_inner): + return await match_func_inner(casted) + else: + return match_func_inner(casted) + + continuations.append((continuation, (match_term, match_func))) + else: + raise ValueError("Match Term must be either a string or a type") + if fall_through: + match_labels.append("None of the above") + + async def continuation(): + with temporary_settings(ai__text__disable_contract=contract_setting): + if inspect.iscoroutinefunction(fall_through): + return await fall_through() + else: + return fall_through() + + continuations.append((continuation, ())) + type_infos = [] + for defined_type in defined_types: + type_name = defined_type.__name__ + schema = json.dumps(defined_type.model_json_schema(mode="validation")) + constraints = [] + if issubclass(defined_type, NaturalLangType): + constraints = defined_type.natural_lang_constraints() + type_infos.append(TypeInfo(type_name, schema, constraints)) + typing_context = Transcript(content=ADDITIONAL_TYPING_CONTEXT_PROMPT).render( + type_infos=type_infos + ) + typing_context = typing_context if typing_context.strip() else None + label = await classify_async( + data, + match_labels, + additional_context=typing_context, + model_kwargs=model_kwargs, + client=client, + ) + label_index = match_labels.index(label) + await_func = continuations[label_index][0] + args = continuations[label_index][1] + return await await_func(*args) + + +def match( + data: any, + *match_terms: Tuple[Union[type, str], Callable], + fall_through: Optional[Callable] = None, + model_kwargs: dict = None, + client: Optional[AsyncMarvinClient] = None, +): + return run_sync( + match_async( + data, + *match_terms, + fall_through=fall_through, + model_kwargs=model_kwargs, + client=client, + ) + ) + + +def func_contract( + func: Callable = None, + pre: Callable = None, + post: Callable = None, + validate_return: bool = False, + model_kwargs: Optional[dict] = None, + client: Optional[AsyncMarvinClient] = None, +): + if func is None: + return partial( + func_contract, pre=pre, post=post, model_kwargs=model_kwargs, client=client + ) + + inner_func = validate_call(func, validate_return=validate_return) + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + if marvin.settings.ai.text.disable_contract: + return func(*args, **kwargs) + + signature = inspect.signature(func) + + # Merge args and kwargs into a single dictionary for easier processing + bound_arguments = signature.bind(*args, **kwargs) + bound_arguments.apply_defaults() + all_arguments = bound_arguments.arguments + if pre: + pre_sig = inspect.signature(pre) + arg_names = list( + filter( + lambda param: param.name in all_arguments.keys(), + pre_sig.parameters.values(), + ) + ) + pre_dict = {key.name: all_arguments[key.name] for key in arg_names} + condition = pre(**pre_dict) + if not condition: + raise ValueError("Pre condition not met") + + result = inner_func(*args, **kwargs) + + if post: + post_sig = inspect.signature(post) + arg_names = list( + filter( + lambda param: param.name in all_arguments.keys(), + post_sig.parameters.values(), + ) + ) + post_dict = {key: all_arguments[key] for key in arg_names} + post_dict["result"] = result + if not post(**post_dict): + raise ValueError("Post Condition Not Met") + return result + + return wrapper + + cast_async.map = cast_async_map cast.map = cast_map classify_async.map = classify_async_map diff --git a/src/marvin/beta/applications/planner.py b/src/marvin/beta/applications/planner.py index 4a2fcb77..09572ab3 100644 --- a/src/marvin/beta/applications/planner.py +++ b/src/marvin/beta/applications/planner.py @@ -28,9 +28,9 @@ class Task(BaseModel): id: int = Field(description="A unique ID") description: str = Field(description="A brief description of the task") - state: Literal[ - "planned", "in_progress", "completed", "canceled", "failed" - ] = "planned" + state: Literal["planned", "in_progress", "completed", "canceled", "failed"] = ( + "planned" + ) parents: list[int] = Field( [], description="IDs of tasks that are parents of this task" ) diff --git a/src/marvin/client/__init__.py b/src/marvin/client/__init__.py index 8e597699..a17443a5 100644 --- a/src/marvin/client/__init__.py +++ b/src/marvin/client/__init__.py @@ -6,12 +6,10 @@ class Marvin: @overload - def __new__(cls: type[Self], client: "Client") -> "MarvinClient": - ... + def __new__(cls: type[Self], client: "Client") -> "MarvinClient": ... @overload - def __new__(cls: type[Self], client: "AsyncClient") -> "AsyncMarvinClient": - ... + def __new__(cls: type[Self], client: "AsyncClient") -> "AsyncMarvinClient": ... def __new__( cls: type[Self], client: Union["Client", "AsyncClient"] diff --git a/src/marvin/client/openai.py b/src/marvin/client/openai.py index c4691123..a7776e7d 100644 --- a/src/marvin/client/openai.py +++ b/src/marvin/client/openai.py @@ -275,6 +275,7 @@ def wrap(cls, client: AsyncClient) -> "AsyncClient": async def generate_chat( self, stream_callback: Optional[Callable[[Message], None]] = None, + request: ChatRequest = None, **kwargs: Any, ) -> Union["ChatCompletion", T]: create = self.client.chat.completions.create @@ -284,11 +285,14 @@ async def generate_chat( kwargs.setdefault("stream", True) # validate request - request = ChatRequest(**kwargs) + if request is None: + request = ChatRequest(**kwargs) + else: + request = request.copy(update=kwargs) + try: - response: "ChatCompletion" = await create( - **request.model_dump(exclude_none=True) - ) + model_request = request.model_dump(exclude_none=True) + response: "ChatCompletion" = await create(**model_request) except NotFoundError as e: if await should_fallback(e, request): response = await create( diff --git a/src/marvin/settings.py b/src/marvin/settings.py index f8aedd80..8f260d71 100644 --- a/src/marvin/settings.py +++ b/src/marvin/settings.py @@ -213,6 +213,7 @@ def discover_api_key(cls, v): class TextAISettings(MarvinSettings): model_config = SettingsConfigDict(env_prefix="marvin_ai_text_", extra="ignore") + disable_contract: bool = True generate_cache_token_cap: int = Field(600) diff --git a/src/marvin/tools/filesystem.py b/src/marvin/tools/filesystem.py index 9e43ef96..0c9764c5 100644 --- a/src/marvin/tools/filesystem.py +++ b/src/marvin/tools/filesystem.py @@ -49,9 +49,9 @@ def write_lines( if mode == "insert": lines[insert_line:insert_line] = contents.splitlines(True) elif mode == "overwrite": - lines[ - insert_line : insert_line + len(contents.splitlines()) - ] = contents.splitlines(True) + lines[insert_line : insert_line + len(contents.splitlines())] = ( + contents.splitlines(True) + ) else: raise ValueError(f"Invalid mode: {mode}") with open(path, "w") as f: diff --git a/src/marvin/types.py b/src/marvin/types.py index 557c3fa3..b4b2c39a 100644 --- a/src/marvin/types.py +++ b/src/marvin/types.py @@ -4,8 +4,11 @@ from typing import Any, Callable, Generic, Literal, Optional, TypeVar, Union import openai.types.chat -from openai.types.chat import ChatCompletion -from pydantic import BaseModel, Field, PrivateAttr, computed_field +from openai.types.chat import ( + ChatCompletion, + ChatCompletionMessageToolCall, +) +from pydantic import BaseModel, Field, PrivateAttr, computed_field, SerializeAsAny from typing_extensions import Annotated, Self from marvin.settings import settings @@ -68,6 +71,12 @@ class FunctionTool(Tool, Generic[T]): function: Optional[Function[T]] = None +class ToolOutput(MarvinType): + output: Any + tool_id: str + tool_name: str + + class ToolSet(MarvinType, Generic[T]): tools: Optional[list[Union[FunctionTool[T], Tool]]] = None tool_choice: Optional[Union[Literal["auto"], dict[str, Any]]] = None @@ -109,10 +118,23 @@ class TextContentBlock(MarvinType): class BaseMessage(MarvinType): """Base schema for messages""" - content: Union[str, list[Union[ImageFileContentBlock, TextContentBlock]]] + content: Optional[Union[str, list[Union[ImageFileContentBlock, TextContentBlock]]]] role: str +class ToolMessage(BaseMessage): + """Schema for Messages pertaining to the result of calling a tool""" + + role: Literal["tool"] = "tool" + tool_call_id: str + + +class ChatCompletionMessage(BaseMessage): + role: Literal["assistant"] + content: Optional[str] = None + tool_calls: Optional[list[ChatCompletionMessageToolCall]] = None + + class Grammar(MarvinType): logit_bias: Optional[LogitBias] = None max_tokens: Optional[Annotated[int, Field(strict=True, ge=1)]] = None @@ -120,12 +142,12 @@ class Grammar(MarvinType): class Prompt(Grammar, ToolSet[T], Generic[T]): - messages: list[BaseMessage] = Field(default_factory=list) + messages: list[SerializeAsAny[BaseMessage]] = Field(default_factory=list) class ResponseModel(MarvinType): model: type - name: str = Field(default="FormatResponse") + name: str = Field(default="FormatFinalResponse") description: str = Field(default="Response format") @@ -181,9 +203,9 @@ class TranscriptRequest(MarvinType): " supplying spelling of complex words, including filler vocalizations, etc." ), ) - response_format: Optional[ - Literal["json", "text", "srt", "verbose_json", "vtt"] - ] = None + response_format: Optional[Literal["json", "text", "srt", "verbose_json", "vtt"]] = ( + None + ) language: Optional[str] = None temperature: Optional[float] = None @@ -192,7 +214,7 @@ class ChatResponse(MarvinType): model_config = dict(arbitrary_types_allowed=True) request: Union[ChatRequest, VisionRequest] response: ChatCompletion - tool_outputs: list[Any] = [] + tool_outputs: list[ToolOutput] = [] class ImageRequest(MarvinType): @@ -352,3 +374,11 @@ def play(self): import marvin.audio marvin.audio.play_audio(self.data) + + +class Predicate(MarvinType): + func: Callable + constraint: str + + def __call__(self, *args, **kwargs): + return self.func(*args, **kwargs) diff --git a/src/marvin/utilities/jinja.py b/src/marvin/utilities/jinja.py index 7501d154..51ab299c 100644 --- a/src/marvin/utilities/jinja.py +++ b/src/marvin/utilities/jinja.py @@ -2,9 +2,10 @@ import inspect import re +import types from datetime import datetime from functools import cached_property -from typing import Any, ClassVar, Pattern, Union +from typing import Any, ClassVar, Pattern, Union, Callable from zoneinfo import ZoneInfo from jinja2 import Environment as JinjaEnvironment @@ -57,6 +58,10 @@ class BaseEnvironment(BaseModel): } ) + def __init__(self, tests: dict[str, Callable[[any], bool]]): + super().__init__() + self.environment.tests = self.environment.tests | tests + @model_validator(mode="after") def setup_globals(self: Self) -> Self: self.environment.globals.update(self.globals) # type: ignore @@ -86,7 +91,11 @@ def render(self, template: Union[str, BaseTemplate], **kwargs: Any) -> str: return template.render(**kwargs).strip() -Environment = BaseEnvironment() +def is_func_type(value): + return isinstance(value, types.FunctionType) + + +Environment = BaseEnvironment(tests={"is_func_type": is_func_type}) def split_text_by_tokens( diff --git a/src/marvin/utilities/python.py b/src/marvin/utilities/python.py index 253fa011..120fdd2d 100644 --- a/src/marvin/utilities/python.py +++ b/src/marvin/utilities/python.py @@ -14,6 +14,32 @@ class ParameterModel(BaseModel): default: Optional[str] +class CallableWithMetaData(BaseModel): + name: str + signature: inspect.Signature + docstring: Optional[str] = "" + func: Optional[Callable] = None + + class Config: + arbitrary_types_allowed = True + + @property + def __name__(self): + return self.name + + @property + def __signature__(self): + return self.signature + + @property + def __doc__(self): + return self.docstring + + def __call__(self, *args, **kwargs): + if self.func: + return self.func(*args, **kwargs) + + class PythonFunction(BaseModel): """ A Pydantic model representing a Python function. @@ -86,7 +112,9 @@ def from_function(cls, func: Callable, **kwargs) -> "PythonFunction": ) for name, param in sig.parameters.items() ] - source_code = inspect.getsource(func).strip() + source_code = "" + if not isinstance(func, CallableWithMetaData): + source_code = inspect.getsource(func).strip() function_dict = { "function": func, @@ -103,18 +131,29 @@ def from_function(cls, func: Callable, **kwargs) -> "PythonFunction": return cls(**function_dict) @classmethod - def from_function_call(cls, func: Callable, *args, **kwargs) -> "PythonFunction": + def from_function_call( + cls, + func: Callable, + extra_render_parameters: Optional[dict], + *args, + **kwargs, + ) -> "PythonFunction": """ Create a PythonFunction instance from a function call. Args: func (Callable): The function to call. + extra_render_parameters: extra parameters to be used in the rendering of the prompts *args: Positional arguments to pass to the function call. **kwargs: Keyword arguments to pass to the function call. Returns: PythonFunction: The created PythonFunction instance, with the return value of the function call set as an attribute. """ + extra_render_parameters = ( + extra_render_parameters if extra_render_parameters else {} + ) + sig = inspect.signature(func) bound = sig.bind(*args, **kwargs) @@ -126,7 +165,8 @@ def from_function_call(cls, func: Callable, *args, **kwargs) -> "PythonFunction" # render the docstring with the bound arguments, if it was supplied as jinja docstring = Environment.render( - func.__doc__ or "", **dict(bound.arguments.items()) + func.__doc__ or "", + **(dict(bound.arguments.items()) | extra_render_parameters), ) instance = cls.from_function( diff --git a/src/marvin/utilities/slack.py b/src/marvin/utilities/slack.py index 4faa6e41..6bcb8418 100644 --- a/src/marvin/utilities/slack.py +++ b/src/marvin/utilities/slack.py @@ -1,4 +1,5 @@ """Module for Slack-related utilities.""" + import os import re from typing import List, Optional, Union diff --git a/src/marvin/utilities/tools.py b/src/marvin/utilities/tools.py index d9485e91..231b2435 100644 --- a/src/marvin/utilities/tools.py +++ b/src/marvin/utilities/tools.py @@ -14,7 +14,7 @@ import pydantic from pydantic import BaseModel, TypeAdapter, create_model -from pydantic.fields import FieldInfo +from pydantic.fields import FieldInfo, Field from pydantic.json_schema import GenerateJsonSchema, JsonSchemaMode from marvin.types import Function, FunctionTool @@ -63,7 +63,9 @@ def generate(self, schema: Any, mode: JsonSchemaMode = "validation"): return json_schema -def tool_from_type(type_: U, tool_name: str = None) -> FunctionTool[U]: +def tool_from_type( + type_: U, tool_name: str = None, chain_of_thought: bool = False +) -> FunctionTool[U]: """ Creates an OpenAI-compatible tool from a Python type. """ @@ -72,12 +74,23 @@ def tool_from_type(type_: U, tool_name: str = None) -> FunctionTool[U]: metadata = next(iter(annotated_metadata)) else: metadata = FieldInfo(description="The formatted response") + from collections import OrderedDict + + fields = OrderedDict() + if chain_of_thought: + fields["reason"] = ( + str, + Field( + description="What is the reason for the value generated. Think step by step" + ), + ) + fields["value"] = (type_, metadata) model = create_model( - tool_name or "FormatResponse", - __doc__="Format the response with valid JSON.", + tool_name or "FormatFinalResponse", + __doc__="Format the final response with valid JSON.", __module__=__name__, - **{"value": (type_, metadata)}, + **fields, ) def tool_fn(**data) -> U: