/ cpp / include / cornell / cas_client.hpp
cas_client.hpp
  1  // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  2  //                                                     // cornell // cas_client
  3  // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  4  //
  5  // C++ gRPC client for Remote Execution API CAS (Content Addressable Storage)
  6  // Mirrors the Haskell NativeLink.Client API
  7  
  8  #pragma once
  9  
 10  #include <cstdint>
 11  #include <memory>
 12  #include <optional>
 13  #include <string>
 14  #include <string_view>
 15  #include <vector>
 16  
 17  #include <grpcpp/grpcpp.h>
 18  
 19  #include "remote_execution.grpc.pb.h"
 20  #include "bytestream.grpc.pb.h"
 21  
 22  namespace cornell {
 23  
 24  // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 25  // Digest
 26  // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 27  
 28  struct Digest {
 29      std::string hash;  // SHA256 hex string (64 chars)
 30      int64_t size;
 31  
 32      bool operator==(const Digest& other) const {
 33          return hash == other.hash && size == other.size;
 34      }
 35  
 36      // Convert to resource name for ByteStream API
 37      std::string toResourceName(std::string_view instanceName) const {
 38          return std::string(instanceName) + "/blobs/" + hash + "/" + std::to_string(size);
 39      }
 40  
 41      // Convert to upload resource name
 42      std::string toUploadResourceName(std::string_view instanceName,
 43                                        std::string_view uuid = "cornell") const {
 44          return std::string(instanceName) + "/uploads/" + std::string(uuid)
 45               + "/blobs/" + hash + "/" + std::to_string(size);
 46      }
 47  };
 48  
 49  // Create digest from raw bytes (computes SHA256)
 50  Digest digestFromBytes(std::string_view data);
 51  
 52  // Create digest from known hash and size
 53  inline Digest digestFromHash(std::string hash, int64_t size) {
 54      return Digest{std::move(hash), size};
 55  }
 56  
 57  // Compute SHA256 hash of data as hex string
 58  std::string hashBytes(std::string_view data);
 59  
 60  // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 61  // Configuration
 62  // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 63  
 64  struct CASConfig {
 65      std::string host = "localhost";
 66      uint16_t port = 50051;
 67      bool useTLS = false;
 68      std::string instanceName;
 69  };
 70  
 71  // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 72  // CAS Client
 73  // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 74  
 75  class CASClient {
 76  public:
 77      explicit CASClient(const CASConfig& config);
 78      ~CASClient() = default;
 79  
 80      // Non-copyable, movable
 81      CASClient(const CASClient&) = delete;
 82      CASClient& operator=(const CASClient&) = delete;
 83      CASClient(CASClient&&) = default;
 84      CASClient& operator=(CASClient&&) = default;
 85  
 86      // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 87      // Upload Operations
 88      // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 89  
 90      // Upload blob to CAS, returns digest
 91      // Uses BatchUpdateBlobs for small blobs (<4MB), ByteStream.Write for large
 92      Digest uploadBlob(std::string_view data);
 93  
 94      // Batch upload multiple blobs (uses BatchUpdateBlobs)
 95      std::vector<Digest> uploadBlobs(const std::vector<std::string_view>& blobs);
 96  
 97      // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 98      // Download Operations
 99      // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
100  
101      // Download blob from CAS
102      std::optional<std::string> downloadBlob(const Digest& digest);
103  
104      // Batch download multiple blobs
105      std::vector<std::optional<std::string>> downloadBlobs(const std::vector<Digest>& digests);
106  
107      // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
108      // Query Operations
109      // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
110  
111      // Check if a blob exists in CAS
112      bool blobExists(const Digest& digest);
113  
114      // Find which blobs are missing from a list
115      std::vector<Digest> findMissingBlobs(const std::vector<Digest>& digests);
116  
117      // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
118      // Connection
119      // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
120  
121      // Check if connection is healthy
122      bool isConnected() const;
123  
124      const CASConfig& config() const { return config_; }
125  
126  private:
127      static constexpr size_t BATCH_UPLOAD_THRESHOLD = 4 * 1024 * 1024;  // 4MB
128      static constexpr size_t STREAM_CHUNK_SIZE = 1024 * 1024;           // 1MB
129  
130      CASConfig config_;
131      std::shared_ptr<grpc::Channel> channel_;
132      std::unique_ptr<build::bazel::remote::execution::v2::ContentAddressableStorage::Stub> casStub_;
133      std::unique_ptr<google::bytestream::ByteStream::Stub> byteStreamStub_;
134  
135      // Internal upload methods
136      void batchUpload(const Digest& digest, std::string_view data);
137      void streamUpload(const Digest& digest, std::string_view data);
138  
139      // Convert between our Digest and proto Digest
140      static build::bazel::remote::execution::v2::Digest toProtoDigest(const Digest& d);
141      static Digest fromProtoDigest(const build::bazel::remote::execution::v2::Digest& pd);
142  };
143  
144  }  // namespace cornell