// Copyright 2021 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

syntax = "proto3";

package pw.log;

import "pw_tokenizer/proto/options.proto";

option java_outer_classname = "Log";

// A log message and metadata. Logs come in a few different forms:
//
//  1. A tokenized log message (recommended for production)
//  2. A non-tokenized log message (good for development)
//  3. A "log missed" tombstone, indicating that some logs were dropped
//
// Size analysis for tokenized log messages, including each field's proto tag:
//
//  - message     - 6-12 bytes, depending on number and value of arguments
//  - line_level  - 3 bytes; 4 bytes if line > 2048 (uncommon)
//  - timestamp   - 3 bytes; assuming delta encoding
//  - thread_name - 6 bytes
//
// Adding the fields gives the total proto message size:
//
//    6-12 bytes - log
//    9-15 bytes - log + level + line
//   12-18 bytes - log + level + line + timestamp
//
// An analysis of a project's log token database revealed the following
// distribution of the number of arguments to log messages:
//
//   # args   # messages
//     0         2,700
//     1         2,400
//     2         1,200
//     3+        1,000
//
// Note: The below proto makes some compromises compared to what one might
// expect for a "clean" proto design, in order to shave bytes off of the
// messages. It is critical that the log messages are as small as possible to
// enable storing more logs in limited memory. This is why, for example, there
// is no separate "DroppedLog" type, or a "TokenizedLog" and "StringLog", which
// would add at least 2 extra bytes per message
message LogEntry {
  // The log message, which may be tokenized.
  //
  // If tokenized logging is used, implementations may encode metadata in the
  // log message rather than as separate proto fields. This reduces the size of
  // the protobuf with no overhead.
  //
  // The standard format for encoding metadata in the log message is defined by
  // the pw_log_tokenized module. The message and metadata are encoded as
  // key-value pairs using ■ and ♦ as delimiters. For example:
  //
  //  ■msg♦This is the log message: %d■module♦wifi■file♦../path/to/file.cc
  //
  // See http://pigweed.dev/pw_log_tokenized for full details. When
  // pw_log_tokenized is used, this metadata is automatically included as
  // described.
  //
  // The level and flags are not included since they may be runtime values and
  // thus cannot always be tokenized. The line number is not included because
  // line numbers change frequently and a new token is created for each line.
  //
  // Size analysis when tokenized:
  //
  //   tag+wire = 1 byte
  //   size     = 1 byte; payload will almost always be < 127 bytes
  //   payload  = N bytes; typically 4-10 in practice
  //
  // Total: 2 + N ~= 6-12 bytes
  optional bytes message = 1 [(tokenizer.format) = TOKENIZATION_OPTIONAL];

  // Packed log level and line number. Structure:
  //
  //   Level: Bottom 3 bits; level = line_level & 0x7
  //   Line: Remaining bits; line = (line_level >> 3)
  //
  // Note: This packing saves two bytes per log message in most cases compared
  // to having line and level separately; and is zero-cost if the log backend
  // omits the line number.
  optional uint32 line_level = 2;

  // Some log messages have flags to indicate attributes such as whether they
  // are from an assert or if they contain PII. The particular flags are
  // product- and implementation-dependent.
  optional uint32 flags = 3;

  // Timestamps are either specified with an absolute timestamp or relative to
  // the previous log entry.
  oneof time {
    // The absolute timestamp in implementation-defined ticks. Applications
    // determine how to interpret this on the receiving end. In the simplest
    // case, these ticks might be milliseconds or microseconds since boot.
    // Applications could also access clock information out-of-band with a
    // ClockParameters protobuf.
    int64 timestamp = 4;

    // Time since the last entry in implementation-defined ticks, as for the
    // timestamp field. This enables delta encoding when batching entries
    // together.
    //
    // Size analysis for this field including tag and varint, assuming 1 kHz
    // ticks:
    //
    //           < 127 ms gap == 127 ms      ==  7 bits == 2 bytes
    //        < 16,000 ms gap ==  16 seconds == 14 bits == 3 bytes
    //     < 2,000,000 ms gap ==  35 minutes == 21 bits == 4 bytes
    //   < 300,000,000 ms gap ==  74 hours   == 28 bits == 5 bytes
    //
    // Log bursts will thus consume just 2 bytes (tag + up to 127ms delta) for
    // the timestamp, which is a good improvement over an absolute timestamp.
    int64 time_since_last_entry = 5;
  }

  // The following fields are planned but will not be added until they are
  // needed. Protobuf field numbers over 15 use an extra byte, so these fields
  // are left out for now to avoid reserving field numbers unnecessarily.

  // When the log buffers are full but more logs come in, the logs are counted
  // and a special log message is omitted with only counts for the number of
  // messages dropped. The timestamp indicates the time that the "missed logs"
  // message was inserted into the queue.
  //
  // As an alternative to these fields, implementations may simply send a
  // message stating the drop count.
  // optional uint32 dropped = ?;
  // optional uint32 dropped_warning_or_above = ?;

  // Represents the device from which the log originated. The meaning of this
  // field is implementation defined
  // optional uint32 source_id = ?;

  // The task or thread name that created the log message. If the log was not
  // created on a thread, it should use a name appropriate to that context.
  // optional bytes thread_name = ?
  //     [(tokenizer.format) = TOKENIZATION_OPTIONAL];

  // The file path where this log was created, if not encoded in the message.
  // optional bytes file = ? [(tokenizer.format) = TOKENIZATION_OPTIONAL];

  // The PW_LOG_MODULE_NAME for this log message, if it is not encoded in the
  // message.
  // optional bytes module_name = ?
  //    [(tokenizer.format) = TOKENIZATION_OPTIONAL];

  // Some messages are associated with trace events, which may carry additional
  // contextual data. This is a tuple of a data format string which could be
  // used by the decoder to identify the data (e.g. printf-style tokens) and the
  // data itself in bytes.
  // optional bytes data_format = ?
  //     [(tokenizer.format) = TOKENIZATION_OPTIONAL];
  // optional bytes data = ?;
}

message LogRequest {
  // This will include fields for configuring log filtering.
}

message LogEntries {
  repeated LogEntry entries = 1;
}

// RPC service for accessing logs.
service Logs {
  rpc Listen(LogRequest) returns (stream LogEntries);
}