diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4080d07 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.lake/ diff --git a/Main.lean b/Main.lean index 24ffa81..6bf41a4 100644 --- a/Main.lean +++ b/Main.lean @@ -1,3 +1,9 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: John Tristan, Paul Govereau, Sean McLaughlin +-/ + import TensorLib def main : IO Unit := diff --git a/TensorLib.lean b/TensorLib.lean index 97b134e..5bbb853 100644 --- a/TensorLib.lean +++ b/TensorLib.lean @@ -1,3 +1,9 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: John Tristan, Paul Govereau, Sean McLaughlin +-/ + -- This module serves as the root of the `TensorLib` library. -- Import modules here that should be built as part of the library. -import TensorLib.Basic \ No newline at end of file +import TensorLib.Basic diff --git a/TensorLib/Basic.lean b/TensorLib/Basic.lean index cdd5b86..2d584bd 100644 --- a/TensorLib/Basic.lean +++ b/TensorLib/Basic.lean @@ -1,3 +1,12 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: John Tristan, Paul Govereau, Sean McLaughlin +-/ + +import TensorLib.TensorData +import TensorLib.TensorElement + namespace TensorLib def hello := "world" diff --git a/TensorLib/TensorData.lean b/TensorLib/TensorData.lean new file mode 100644 index 0000000..ac41d89 --- /dev/null +++ b/TensorLib/TensorData.lean @@ -0,0 +1,180 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: John Tristan, Paul Govereau, Sean McLaughlin +-/ +import Mathlib.Tactic +import TensorLib.TensorElement + +namespace TensorLib + +/-! +This file defines TensorData, the raw data underlying a Tensor. +We are representing it today as nested linked lists. We predict +this will be easier to reason about, but will be slow to evaluate. +We intend to have a more compact and faster-to-access representation +in addition to this one in the future for execution. +-/ + +-- I tried making this implicit with {} but I needed to add @ everwhere +-- and give it the argument anyway. Maybe there is a way to infer at least +-- within a section +variable (a: Type) + +-- I wonder if writing all these out will get old. Can Lean infer the Add/Sub/etc. +-- somehow? +variable [addI: Add a][subI: Sub a][mulI: Mul a][divI: Div a][negI: Neg a] + [tensorElementI: TensorElement a] + +-- I tried `td_node (x: TensorData) (xs: List TensorData)` to remove the redundant empty node +-- from the type. (We can already represent the 0 shape with `td_root []`.) +-- This design works, but this way the examples are more uniform; it looked weird to +-- syntactically preference the first of every node list. The code has some annoying special +-- cases for the empty node now, though, so it may be worth revisiting this. +inductive TensorData where +| td_root (xs: List a) +| td_node (xs: List TensorData) + +abbrev Shape := List Nat + +-- TODO: Make tail-recursive, perhaps when Lean will eliminite tail calls in mutual-recursion + def TensorData.hasShape (x: TensorData a) (shape: Shape): Bool := + match x, shape with + | .td_root y, List.cons n .nil => List.length y == n + | .td_node ys, List.cons n shape' => + List.length ys == n && + -- `attach` is used in proofs below, e.g. HashShapeOk + List.all ys.attach (fun ⟨ y, _ ⟩ => hasShape y shape') + | _, _ => false + +inductive TensorData.HasShape : (TensorData a) -> Shape -> Prop where +| HasShapeRoot xs n : List.length xs == n -> HasShape (TensorData.td_root xs) [n] +| HasShapeNil : HasShape (TensorData.td_root []) [0] +| HasShapeCons x xs s : HasShape x s + -> (∀ x', x' ∈ xs -> HasShape x' s) + -> HasShape (TensorData.td_node (.cons x xs)) (.cons (1 + List.length xs) s) + +theorem HashShapeOk : ∀ (x: TensorData a) (s: Shape), x.HasShape a s <-> x.hasShape a s := by + intro x1 s1 + constructor + . intro H + induction H with + | HasShapeRoot xs n H => + unfold TensorData.hasShape + exact H + | HasShapeNil => + unfold TensorData.hasShape + simp + | HasShapeCons x xs s H1 H2 H3 H4 => + clear x1 s1 + unfold TensorData.hasShape + simp [H3] + constructor + . linarith + . intros y H5 y' H6 H7 + apply H4 + rw[<- H7] + exact H6 + . revert s1 + induction x1 + + +-- cases x +-- unfold TensorData.hasShape + +def TensorData.getShape (x: TensorData a): Shape := + match x with + | .td_root y => [List.length y] + | .td_node ys => match ys with + | .nil => [0] + | .cons y _ => .cons (List.length ys) (getShape y) + +-- The `shape` is the dimensions of the tensor. +-- When you know the tensor is well-formed, you can +-- just use `getShape`, which only traverses the first +-- branch +def shapeOpt (x: TensorData a): Option Shape := + let s := x.getShape + if x.hasShape _ s then .some s else .none + +#eval shapeOpt _ (.td_root [1, 2, 3]) + +#eval shapeOpt _ ( + .td_node [ + (.td_root [1, 2, 3]), + (.td_root [1, 2, 3]) + ]) + +#eval shapeOpt _ ( + .td_node [ + (.td_root [1, 2, 3]), + (.td_root [1, 2]) + ]) + + +-- CPS version of flatten (we're experimenting here, after all) +def flattenAux {b} (x: TensorData a) (k: List a -> b): b := + match x with + | .td_root x => k x + | .td_node .nil => k [] + | .td_node (.cons y ys) => + flattenAux y (fun acc => flattenAux (.td_node ys) (fun acc' => k (List.append acc acc'))) + +-- Note that `flatten` does not need well-shaped tensors +-- Otherwise should behave like https://numpy.org/doc/2.0/reference/generated/numpy.ndarray.flatten.html +def flatten (x: TensorData a): List a := + @flattenAux a (List a) x id + +-- Mutually recursive version of flatten. This is not tail-recursive, but let's +-- consider it as an option if the proofs go through easier with it. +mutual + def flatten_mutual (x: TensorData a): List a := match x with + | .td_root ys => ys + | .td_node xs => flatten_list xs + + def flatten_list (xs: List (TensorData a)): List a := match xs with + | .nil => [] + | .cons d ds => List.append (flatten_mutual d) (flatten_list ds) +end + +#eval flatten _ ( + .td_root ([]: List Nat) +) + +#eval flatten _ ( + .td_root ([1, 2, 3]: List Nat) +) + +#eval flatten _ ( + TensorData.td_node [ + .td_root ([1, 2, 3]: List Nat), + .td_root ([4, 5, 6]: List Nat), + .td_root ([7, 8, 9]: List Nat), + ] +) + +#eval flatten _ ( + .td_node [ + .td_root ([1, 2, 3]: List Nat), + .td_node [ + .td_root ([1, 2, 3]: List Nat), + .td_root ([4, 5, 6]: List Nat), + .td_root ([7, 8, 9]: List Nat), + ], + .td_root ([1, 2, 3]: List Nat), + ] +) + +theorem flattens_same : ∀ x, flatten a x = flatten_mutual a x := by + intro x + cases x + . unfold flatten flattenAux flatten_mutual + simp + . + + -- unfold flatten_mutual flatten_list + + + +end TensorData +end TensorLib diff --git a/TensorLib/TensorElement.lean b/TensorLib/TensorElement.lean new file mode 100644 index 0000000..7dc5e3e --- /dev/null +++ b/TensorLib/TensorElement.lean @@ -0,0 +1,26 @@ +/- +Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: John Tristan, Paul Govereau, Sean McLaughlin +-/ + +namespace TensorLib + +-- Since one goal of TensorLib is to support ML frameworks and languages +-- like Triton and NKI, we will need to support a menagerie of data types. +-- Triton and NKI supports signed and unsigned ints (8/16/32/64) and +-- several floating point types and sizes (float, bfloat, tfloat). +class TensorElement (a: Type) [Add a][Sub a][Mul a][Neg a] /- etc. -/ where + -- Implement stuff here that's needed for numpy-like ops + +-- Abritrary precision +instance : TensorElement Int where + +-- 64 bit IEEE-754 floats +instance : TensorElement Float where + +-- Code for 2s compliment ints is here https://gist.github.com/ammkrn/79281ae3d3b301c99a84821c18dcb5f1 +-- TODO: Int8/16/32/64 + +-- TODO: Figure out unsigned ints +-- TODO: bfloat, tfloat diff --git a/lake-manifest.json b/lake-manifest.json index 3276d9a..efd0c86 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -1,5 +1,85 @@ {"version": "1.1.0", "packagesDir": ".lake/packages", - "packages": [], + "packages": + [{"url": "https://github.com/leanprover-community/batteries", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "c521f0185f4dd42b6aa4898010d5ba5357c57c9f", + "name": "batteries", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/quote4", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "1357f4f49450abb9dfd4783e38219f4ce84f9785", + "name": "Qq", + "manifestFile": "lake-manifest.json", + "inputRev": "master", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/aesop", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "b20a88676fd00affb90cbc9f1ff004ae588103b3", + "name": "aesop", + "manifestFile": "lake-manifest.json", + "inputRev": "master", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/ProofWidgets4", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "baa65c6339a56bd22b7292aa4511c54b3cc7a6af", + "name": "proofwidgets", + "manifestFile": "lake-manifest.json", + "inputRev": "v0.0.43", + "inherited": true, + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover/lean4-cli", + "type": "git", + "subDir": null, + "scope": "leanprover", + "rev": "2cf1030dc2ae6b3632c84a09350b675ef3e347d0", + "name": "Cli", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/import-graph", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "9b4088ccf0f44ddd7b1132bb1348aef8cf481e12", + "name": "importGraph", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/LeanSearchClient", + "type": "git", + "subDir": null, + "scope": "leanprover-community", + "rev": "7bedaed1ef024add1e171cc17706b012a9a37802", + "name": "LeanSearchClient", + "manifestFile": "lake-manifest.json", + "inputRev": "main", + "inherited": true, + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/mathlib4", + "type": "git", + "subDir": null, + "scope": "", + "rev": "63c47e1d61626a0e8d3209728b4a86fc90b87cd7", + "name": "mathlib", + "manifestFile": "lake-manifest.json", + "inputRev": null, + "inherited": false, + "configFile": "lakefile.lean"}], "name": "TensorLib", "lakeDir": ".lake"} diff --git a/lakefile.lean b/lakefile.lean index bd5c4cf..8cd5067 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -10,3 +10,6 @@ lean_lib «TensorLib» where @[default_target] lean_exe "tensorlib" where root := `Main + +require mathlib from git + "https://github.com/leanprover-community/mathlib4" diff --git a/lean-toolchain b/lean-toolchain index 7f0ea50..eff86fd 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.10.0 +leanprover/lean4:v4.13.0-rc3