test/cloudschema.proto at master · JuniusLuo/test

106 lines (94 loc) · 3.21 KB
// See README.txt for information and build instructions.
// Note: START and END tags are used in comments to define sections used in
// tutorials.  They are not part of the syntax for Protocol Buffers.
// To get an in-depth walkthrough of this file and the related examples, see:
// https://developers.google.com/protocol-buffers/docs/tutorials
// [START declaration]
syntax = "proto3";
package test;
// [END declaration]
// [START messages]
message ObjectSMD {
  string name = 1;
  string bucket = 2;
  int64 size = 3;
  int64 mtime = 4;
  string etag = 5;
message Acl {
message UMD {
  string key = 1;
  string strval = 2;
  string byteval = 3;
message ObjectUMD {
  repeated UMD umd = 1;
// md5 is 32 bytes. if data block is 128KB, 128MB object has 1k blocks, 32KB.
// 512MB object has 4k blocks, 128KB. 128GB object has 1m blocks, 32MB.
// 1TB object has 8m bloks, 256MB. 4TB object has 32m blocks, 1GB.
// So we need the ability to split the blocks.
// 1. split meta object to multiple objects, such as /b1/k1.md.1, k1.md.2, etc.
//    needs to handle the object name actually exist. For example, k1.md is a real
//    object name. The meta object of k1.md will be k1.md.md.1.
// 2. keep a single meta object. The problem becomes how to deserialize.
//    we have to have a small meta gpb with fixed size. Then append the split blocks.
// 3. unify with MPU. We could split the blocks to parts. The first 64k blocks 2MB
//    will be stored directly in the meta object with other metadata.
// #3 is best.
// Just a reference: DD block size is variable and ends up being 4K-12K, another says 64KB?
message ObjectData {
  // data block size, such as 128KB
  int32 blockSize = 1;
  // the draft estimation of the dedup blocks. this may not be
  // accurate as concurrent puts could come around the same time.
  // there could have more dedup blocks, but will not less.
  int64 ddBlocks = 2;
  // max blocks per part
  int32 maxBlocks = 3;
  // to support the large object, blocks could be split to parts.
  // the first and last part could have embeded blocks, to simplify the read flow,
  // the last part may not have block.
  repeated DataPart dataParts = 4;
message DataPart {
  // part name, format: uuid.partNum
  string name = 1;
  // the reverse MD for part to know its object, nil if DataPart is embeded
  DataPartMD md = 2;
  // the possible embeded blocks
  repeated string blocks = 3;
message DataPartMD {
  // bucket name
  string bucketName = 1;
  // object name
  string objectName = 2;
message ObjectMD {
  // uuid for object put, to handle the possible concurrent puts for the same object.
  string uuid = 1;
  // SMD is the minimal set of system metadata
  ObjectSMD smd = 2;
  Acl acl = 3;
  ObjectUMD umd = 4;
  // the first DataBlock
  ObjectData data = 5;
// the positive and negative refs for one block.
// the ref is key name to allow inserting the same ref again.
// what if there are huge refs to one block? assume key name is 512 bytes,
// if one block is refed by 1k keys 512KB, 1m keys 512MB.
// similar with data block, could have like md5.rr.1, md5.rr.2
message BlockRefs {
  repeated string pref = 1;
  repeated string nref = 2;
  // if there are more refs parts
  repeated string refParts = 3;
// [END messages]
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

cloudschema.proto

Latest commit

History

cloudschema.proto

File metadata and controls