OpenTelemetry Tracing
Panduan lengkap implementasi OpenTelemetry (OTEL) untuk observability di MStore Backend.
🎯 Overview
MStore Backend menggunakan OpenTelemetry untuk:
Distributed Tracing - Track request flow across services
Structured Logging - Contextual logs dengan correlation ID
Metrics Collection - Performance metrics & monitoring
Package : pkg/utils/logger/otel.go
📊 Architecture
OTEL Stack
🚀 Quick Start
1. Initialize OTEL
package main
import (
" context "
utils_logger " gitlab.com/mushola-store/mstore_backend/pkg/utils/logger "
)
func main () {
// Initialize OpenTelemetry
shutdown , err := utils_logger . InitOpenTelemetry (
context . Background (),
"pos-api" , // service name
"1.0.0" , // version
"production" , // environment
"localhost:4317" , // OTLP endpoint
)
if err != nil {
panic ( err )
}
defer shutdown ( context . Background ())
// Your application code
}
🎨 Trace Layers
Layer Types
MStore menggunakan 4 layer standar untuk tracing:
const (
LayerHandler TraceLayer = "handler" // HTTP/gRPC handlers
LayerService TraceLayer = "service" // Business logic
LayerRepository TraceLayer = "repository" // Database operations
LayerExternal TraceLayer = "external" // External API calls
)
📝 Usage Examples
Handler Layer
package transaction_handler
import (
" github.com/gofiber/fiber/v2 "
utils_logger " gitlab.com/mushola-store/mstore_backend/pkg/utils/logger "
)
func ( h * TransactionHandler ) CreateTransaction ( ctx * fiber . Ctx ) error {
// Start handler span
fctx , span := utils_logger . StartLayerSpanFromFiber (
ctx ,
utils_logger . LayerHandler ,
"CreateTransaction" ,
)
defer utils_logger . EndLayerSpan ( span , nil )
// Step 1: Parse body
utils_logger . TraceStep ( fctx , "parse_request_body" )
var payload PayloadCreateTransaction
if err := ctx . BodyParser ( & payload ); err != nil {
utils_logger . TraceStepErr ( fctx , "parse_body_failed" , err )
return utils_logger . EndLayerSpan ( span , err )
}
// Step 2: Validate
utils_logger . TraceStep ( fctx , "validate_payload" )
if payload . BranchCode == "" {
err := errors . New ( "branch_code required" )
utils_logger . TraceStepErr ( fctx , "validation_failed" , err )
return utils_logger . EndLayerSpan ( span , err )
}
// Step 3: Call service
utils_logger . TraceStep ( fctx , "call_service" )
result , err := h . transactionService . CreateTransaction ( fctx , payload )
if err != nil {
return utils_logger . EndLayerSpan ( span , err )
}
// Success
utils_logger . AddStepEvent ( fctx , "transaction_created" , map [ string ] interface {}{
"transaction_id" : result . ID ,
"grand_total" : result . GrandTotal ,
})
return utils_response . Result ( ctx , result , nil )
}
Service Layer
package transaction_service
import (
" context "
utils_logger " gitlab.com/mushola-store/mstore_backend/pkg/utils/logger "
)
func ( s * TransactionService ) CreateTransaction (
ctx context . Context ,
payload PayloadCreateTransaction ,
) ( * Transaction , error ) {
// Start service span
ctx , span := utils_logger . StartLayerSpan (
ctx ,
utils_logger . LayerService ,
"CreateTransaction" ,
)
defer utils_logger . EndLayerSpan ( span , nil )
// Step 1: Get branch
utils_logger . TraceStep ( ctx , "get_branch" )
branch , err := s . branchRepo . GetByCode ( ctx , payload . BranchCode )
if err != nil {
utils_logger . TraceStepErr ( ctx , "branch_not_found" , err )
return nil , utils_logger . EndLayerSpan ( span , err )
}
// Step 2: Calculate totals
utils_logger . TraceStep ( ctx , "calculate_totals" )
subtotal := calculateSubtotal ( payload . Items )
grandTotal := subtotal - payload . DiscountTotal + payload . TaxTotal
utils_logger . AddStepEvent ( ctx , "totals_calculated" , map [ string ] interface {}{
"subtotal" : subtotal ,
"grand_total" : grandTotal ,
})
// Step 3: Save transaction
utils_logger . TraceStep ( ctx , "save_transaction" )
transaction := & Transaction {
BranchID : branch . ID ,
Subtotal : subtotal ,
GrandTotal : grandTotal ,
}
if err := s . transactionRepo . Create ( ctx , transaction ); err != nil {
return nil , utils_logger . EndLayerSpan ( span , err )
}
return transaction , nil
}
Repository Layer
package transaction_repository
import (
" context "
" gorm.io/gorm "
utils_logger " gitlab.com/mushola-store/mstore_backend/pkg/utils/logger "
)
func ( r * TransactionRepository ) Create (
ctx context . Context ,
transaction * Transaction ,
) error {
// Start repository span
ctx , span := utils_logger . StartLayerSpan (
ctx ,
utils_logger . LayerRepository ,
"CreateTransaction" ,
)
defer utils_logger . EndLayerSpan ( span , nil )
// Add database attributes
utils_logger . AddStepEvent ( ctx , "db_insert" , map [ string ] interface {}{
"table" : "transactions" ,
"operation" : "INSERT" ,
})
// Execute query
if err := r . db . WithContext ( ctx ). Create ( transaction ). Error ; err != nil {
utils_logger . TraceStepErr ( ctx , "db_insert_failed" , err )
return utils_logger . EndLayerSpan ( span , err )
}
utils_logger . AddStepEvent ( ctx , "db_insert_success" , map [ string ] interface {}{
"transaction_id" : transaction . ID ,
})
return nil
}
External API Layer
package xendit_service
import (
" context "
" net/http "
utils_logger " gitlab.com/mushola-store/mstore_backend/pkg/utils/logger "
)
func ( s * XenditService ) CreateQRIS (
ctx context . Context ,
amount float64 ,
) ( * QRISResponse , error ) {
// Start external span
ctx , span := utils_logger . StartLayerSpan (
ctx ,
utils_logger . LayerExternal ,
"Xendit.CreateQRIS" ,
)
defer utils_logger . EndLayerSpan ( span , nil )
// Add external service attributes
utils_logger . AddStepEvent ( ctx , "external_api_call" , map [ string ] interface {}{
"service" : "xendit" ,
"endpoint" : "/qr_codes" ,
"method" : "POST" ,
})
// Make HTTP request
resp , err := s . httpClient . Post ( ctx , "/qr_codes" , payload )
if err != nil {
utils_logger . TraceStepErr ( ctx , "api_call_failed" , err )
return nil , utils_logger . EndLayerSpan ( span , err )
}
utils_logger . AddStepEvent ( ctx , "api_call_success" , map [ string ] interface {}{
"status_code" : resp . StatusCode ,
"qr_id" : resp . ID ,
})
return resp , nil
}
🔧 Helper Functions
StartLayerSpan
Memulai span baru dengan layer context.
func StartLayerSpan (
ctx context . Context ,
layer TraceLayer ,
operationName string ,
) ( context . Context , oteltrace . Span )
Example :
ctx , span := utils_logger . StartLayerSpan (
ctx ,
utils_logger . LayerService ,
"ProcessPayment" ,
)
defer utils_logger . EndLayerSpan ( span , nil )
StartLayerSpanFromFiber
Memulai span dari Fiber context (untuk handlers).
func StartLayerSpanFromFiber (
fctx * fiber . Ctx ,
layer TraceLayer ,
operationName string ,
) ( context . Context , oteltrace . Span )
Example :
ctx , span := utils_logger . StartLayerSpanFromFiber (
fiberCtx ,
utils_logger . LayerHandler ,
"GetUserProfile" ,
)
defer utils_logger . EndLayerSpan ( span , nil )
TraceStep
Mencatat step dalam span (tanpa error).
func TraceStep ( ctx context . Context , stepName string )
Example :
utils_logger . TraceStep ( ctx , "validate_input" )
utils_logger . TraceStep ( ctx , "fetch_user_data" )
utils_logger . TraceStep ( ctx , "calculate_total" )
TraceStepErr
Mencatat step dengan error.
func TraceStepErr ( ctx context . Context , stepName string , err error )
Example :
if err := validatePayload ( payload ); err != nil {
utils_logger . TraceStepErr ( ctx , "validation_failed" , err )
return err
}
AddStepEvent
Menambahkan event dengan attributes ke span.
func AddStepEvent (
ctx context . Context ,
eventName string ,
attrs map [ string ] interface {},
)
Example :
utils_logger . AddStepEvent ( ctx , "payment_processed" , map [ string ] interface {}{
"payment_id" : payment . ID ,
"amount" : payment . Amount ,
"method" : payment . Method ,
"status" : "success" ,
})
EndLayerSpan
Mengakhiri span dengan error handling.
func EndLayerSpan ( span oteltrace . Span , err error ) error
Example :
defer utils_logger . EndLayerSpan ( span , nil )
// Or with error
if err != nil {
return utils_logger . EndLayerSpan ( span , err )
}
🎯 Best Practices
1. Always Defer EndLayerSpan
// ✅ GOOD
ctx , span := utils_logger . StartLayerSpan ( ctx , layer , "Operation" )
defer utils_logger . EndLayerSpan ( span , nil )
// ❌ BAD - Span tidak akan ditutup jika panic
ctx , span := utils_logger . StartLayerSpan ( ctx , layer , "Operation" )
// ... code ...
utils_logger . EndLayerSpan ( span , nil )
2. Use Descriptive Operation Names
// ✅ GOOD
StartLayerSpan ( ctx , LayerService , "CreateTransactionWithPayment" )
StartLayerSpan ( ctx , LayerRepository , "GetUserByEmail" )
// ❌ BAD
StartLayerSpan ( ctx , LayerService , "Process" )
StartLayerSpan ( ctx , LayerRepository , "Get" )
3. Add Meaningful Attributes
// ✅ GOOD
utils_logger . AddStepEvent ( ctx , "order_created" , map [ string ] interface {}{
"order_id" : order . ID ,
"customer_id" : order . CustomerID ,
"total_amount" : order . TotalAmount ,
"item_count" : len ( order . Items ),
})
// ❌ BAD
utils_logger . AddStepEvent ( ctx , "done" , map [ string ] interface {}{
"status" : "ok" ,
})
4. Trace Important Steps
// ✅ GOOD - Trace key steps
utils_logger . TraceStep ( ctx , "validate_input" )
utils_logger . TraceStep ( ctx , "check_inventory" )
utils_logger . TraceStep ( ctx , "reserve_stock" )
utils_logger . TraceStep ( ctx , "create_transaction" )
utils_logger . TraceStep ( ctx , "process_payment" )
// ❌ BAD - Too granular
utils_logger . TraceStep ( ctx , "line_1" )
utils_logger . TraceStep ( ctx , "line_2" )
utils_logger . TraceStep ( ctx , "line_3" )
5. Always Log Errors
// ✅ GOOD
if err := service . Process ( ctx , data ); err != nil {
utils_logger . TraceStepErr ( ctx , "process_failed" , err )
return utils_logger . EndLayerSpan ( span , err )
}
// ❌ BAD - Error tidak tercatat
if err := service . Process ( ctx , data ); err != nil {
return err
}
📊 Viewing Traces
Grafana Tempo
Access : http://localhost:3000/explore
Query Examples :
# Find traces by service
{service.name="pos-api"}
# Find traces by operation
{span.name="CreateTransaction"}
# Find traces with errors
{status="error"}
# Find traces by duration
{duration > 1s}
# Find traces by attribute
{transaction.id="TRX-123"}
Trace Structure
Span: CreateTransaction (handler)
├── Step: parse_request_body
├── Step: validate_payload
├── Span: CreateTransaction (service)
│ ├── Step: get_branch
│ ├── Step: calculate_totals
│ ├── Span: CreateTransaction (repository)
│ │ ├── Event: db_insert
│ │ └── Event: db_insert_success
│ └── Span: CreateQRIS (external)
│ ├── Event: external_api_call
│ └── Event: api_call_success
└── Event: transaction_created
🔍 Troubleshooting
Traces Not Appearing
Check :
OTLP endpoint accessible
Service name configured
Spans properly closed
No network issues
# Test OTLP endpoint
curl -v http://localhost:4317
# Check logs
tail -f logs/app.log | grep -i otel
High Cardinality Attributes
Problem : Too many unique attribute values
Solution : Use bounded values
// ✅ GOOD
attrs [ "status" ] = "success" // bounded: success|failed|pending
// ❌ BAD
attrs [ "user_id" ] = userID // unbounded: millions of values
Missing Context
Problem : Context not propagated
Solution : Always pass context
// ✅ GOOD
func ProcessOrder ( ctx context . Context , order Order ) error {
ctx , span := utils_logger . StartLayerSpan ( ctx , LayerService , "ProcessOrder" )
defer utils_logger . EndLayerSpan ( span , nil )
// Pass context to next layer
return repo . SaveOrder ( ctx , order )
}
// ❌ BAD
func ProcessOrder ( order Order ) error {
// No context - traces disconnected
return repo . SaveOrder ( order )
}
🎨 Configuration
Environment Variables
# OTLP Endpoint
OTEL_EXPORTER_OTLP_ENDPOINT = localhost:4317
# Service Info
OTEL_SERVICE_NAME = pos-api
OTEL_SERVICE_VERSION = 1.0.0
OTEL_DEPLOYMENT_ENVIRONMENT = production
# Sampling
OTEL_TRACES_SAMPLER = parentbased_traceidratio
OTEL_TRACES_SAMPLER_ARG = 0.1 # 10% sampling
# Enable/Disable
OTEL_SDK_DISABLED = false
Sampling Strategy
Development : 100% sampling
sampler := sdktrace . AlwaysSample ()
Production : Adaptive sampling
sampler := sdktrace . ParentBased (
sdktrace . TraceIDRatioBased ( 0.1 ), // 10%
)
System Design Overall system architecture
Observability (LGTM) LGTM stack overview
Grafana Tempo Tempo traces documentation
Error Handling Error handling best practices
OpenTelemetry : Tracing diaktifkan secara default di development & production. Gunakan EnableTraceSteps = false untuk disable step events.