diff --git a/data/proof-bench.jsonl b/data/proof-bench.jsonl new file mode 100644 index 0000000..8ac2d55 --- /dev/null +++ b/data/proof-bench.jsonl @@ -0,0 +1,2 @@ +{"id": "algebraicGeometry_vakil_ch_11_2_E", "file": "algebraicGeometry_vakil_ch_11_2_E.lean", "source": "Vakil, The Rising Sea : Foundations of Algebraic Geometry (Princeton University Press, 2025), p. 316, Example 11.2.E.", "statement": "Show that an $A$-scheme is separated (over $A$) if and only if it is separated over $\\mathbb{Z}$.", "header": "import Mathlib\n\nopen AlgebraicGeometry\n\nvariable {A : Type*} {X : Scheme} [CommRing A]\n (struct_X : X ⟶ Spec (CommRingCat.of A))", "formal": "theorem algebraicGeometry_vakil_ch_11_2_E :\n IsSeparated struct_X ↔ IsSeparated (instOverTerminalScheme X).hom :=", "nl_proof": "Let $Y=\\text{Spec}(A)$ and $Z = \\text{Spec}(\\mathbb{Z})$ (the terminal object), let $f: X \\to Y$ be the structure morphism of $X$ as an $A$-scheme, let $g: Y \\to Z$ be the morphism $\\text{Spec}(A) \\to \\text{Spec}(\\mathbb{Z})$\n\nThe key point is the following cancellation property for separatedness: If $g$ is separated, then $(g \\circ f)$ is separated $\\iff$ $f$ is separated.\n\nThe composition $(g \\circ f): X \\to Z$ is the structure morphism of $X$ as a $\\mathbb{Z}$-scheme. Since the $g$ is a morphism between affine schemes, it is automatically separated, the proposition applies.\n\nTherefore, we conclude that $X$ is separated over $\\mathbb{Z}$ $\\iff$ $X$ is separated over $A$."} +{"id": "measure_dembo_1-1-5.lean", "file": "measure_dembo_1-1-5.lean.lean", "source": "Probability Theory: STAT310/MATH230 by Amir Dembo, April 15, 2021, Exercise 1.1.5", "statement": "Prove that a finitely additive non-negative set function \\(\\mu\\) on a measurable space \n\\((\\Omega, \\mathcal{F})\\) with the ``continuity'' property\n\\[B_n \\in \\mathcal{F}, B_n \\downarrow \\emptyset, \\mu(B_n) < \\infty \\to \\mu(B_n) \\to 0\\]\nmust be countably additive if \\(\\mu(\\Omega) < \\infty\\).", "header": "import Mathlib\n\nopen TopologicalSpace Filter MeasureTheory ProbabilityTheory Function\n\nopen scoped NNReal ENNReal MeasureTheory ProbabilityTheory Topology\n\nnamespace MeasureTheory\n\nvariable {Ω : Type*} [MeasurableSpace Ω]", "formal": "theorem dembo_1_1_15 {μ : Set Ω → ℝ≥0}\n (hAdd : ∀ A B, MeasurableSet A → MeasurableSet B → Disjoint A B → μ (A ∪ B) = μ A + μ B)\n (hCont : ∀ B : ℕ → Set Ω,\n (∀ n, MeasurableSet (B n)) → Antitone B → (⋂ n, B n) = ∅\n → Tendsto (fun n => μ (B n)) atTop (𝓝 0))\n {A : ℕ → Set Ω} (hA : ∀ n, MeasurableSet (A n)) (hDisj : Pairwise (Disjoint on A)) :\n μ (⋃ n, A n) = ∑' n, μ (A n) :=", "nl_proof": "Let \\(\\{A_n\\}_{n=1}^\\infty\\) be a sequence of pairwise disjoint sets in $\\mathcal{F}$ and let\n\\(A = \\bigcup_{n=1}^\\infty A_n.\\)\nSince $\\mu$ is finitely additive, we have for each \\(N\\)\n\\[\\mu\\left( \\bigcup_{n=1}^N A_n \\right) = \\sum_{n=1}^N \\mu(A_n).\\]\nDefine \\(B_N := \\bigcup_{n=N+1}^\\infty A_n\\).\nThen \\(A = \\left( \\bigcup_{n=1}^N A_n \\right) \\cup B_N\\), with disjoint union. Hence, by finite \nadditivity,\n\\[\\mu(A) = \\sum_{n=1}^N \\mu(A_n) + \\mu(B_N).\\]\nSince \\(B_N \\downarrow \\emptyset\\) as \\(N \\to \\infty\\), and \\(\\mu(B_N) \\leq \\mu(A) \\leq \\mu(\\Omega) \n< \\infty\\), the ``continuity from above'' property implies \\(\\mu(B_N) \\to 0.\\)\nTherefore,\n\\[\\mu(A) = \\lim_{N \\to \\infty} \\sum_{n=1}^N \\mu(A_n) = \\sum_{n=1}^\\infty \\mu(A_n),\\]\nwhich proves that $\\mu$ is countably additive."} diff --git a/proof_bench/metadata_utils.py b/proof_bench/metadata_utils.py index 8ab8422..f2fd194 100644 --- a/proof_bench/metadata_utils.py +++ b/proof_bench/metadata_utils.py @@ -33,6 +33,14 @@ def _extract_theorem_info(content: str) -> tuple[str, str]: return header, formal +def _informal_base_name(file_path: Path) -> str: + """Stem used for informal/*.tex; strips repeated .lean (e.g. foo.lean.lean → foo).""" + name = file_path.stem + while name.endswith(".lean"): + name = name[: -len(".lean")] + return name + + def _read_informal_file(base_dir: Path, problem_name: str, suffix: str) -> str: """Read an informal .tex file for a problem, returning empty string when unavailable.""" informal_dir = base_dir / "problems" / "informal" @@ -82,8 +90,9 @@ def export_jsonl( header, formal = _extract_theorem_info(content) problem_name = file_path.stem - nl_proof = _find_nl_proof(base, problem_name) - statement = _find_statement(base, problem_name) + informal_key = _informal_base_name(file_path) + nl_proof = _find_nl_proof(base, informal_key) + statement = _find_statement(base, informal_key) entry = { "id": problem_name,