Skip to main content

Python SDK

The OpenMetadata Python SDK provides a comprehensive interface for interacting with the OpenMetadata API. It offers type-safe operations for managing metadata entities and seamless integration with your Python applications.

Installation

Install the OpenMetadata Python SDK using pip:
pip install openmetadata-ingestion

Quick Start

Basic Connection

from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
    OpenMetadataConnection,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
    OpenMetadataJWTClientConfig,
)

# Configure connection
server_config = OpenMetadataConnection(
    hostPort="http://localhost:8585/api",
    authProvider="collate",
    securityConfig=OpenMetadataJWTClientConfig(
        jwtToken="your-jwt-token"
    ),
)

# Create OpenMetadata client
metadata = OpenMetadata(server_config)

# Health check
if metadata.health_check():
    print("Successfully connected to OpenMetadata!")

Working with Entities

from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.databaseService import DatabaseService

# Get all database services
services = list(metadata.list_all_entities(entity=DatabaseService))
print(f"Found {len(services)} database services")

# Get a specific table by name
table = metadata.get_by_name(
    entity=Table,
    fqn="your-service.your-database.your-schema.your-table"
)

if table:
    print(f"Table: {table.name}")
    print(f"Columns: {len(table.columns) if table.columns else 0}")

Core Functionality

Entity Management

The Python SDK provides full CRUD operations for all OpenMetadata entities:

Create or Update Entities

from metadata.generated.schema.api.data.createTable import CreateTableRequest

# Create table request
create_table = CreateTableRequest(
    name="sample_table",
    databaseSchema="your_database.your_schema",
    columns=[
        # Define your columns here
    ],
    description="Sample table created via Python SDK"
)

# Create or update the table
table = metadata.create_or_update(data=create_table)

Retrieve Entities

# Get by ID
table = metadata.get_by_id(entity=Table, entity_id="uuid-here")

# Get by fully qualified name
table = metadata.get_by_name(
    entity=Table,
    fqn="service.database.schema.table"
)

# List entities with pagination
tables = metadata.list_entities(
    entity=Table,
    fields=["owner", "tags"],
    limit=50
)

List All Entities

# Generator for large datasets
all_tables = metadata.list_all_entities(
    entity=Table,
    fields=["owner", "columns"],
    params={"service": "your-service-name"}
)

for table in all_tables:
    print(f"Processing table: {table.name}")

Entity References

# Get entity reference for relationships
table_ref = metadata.get_entity_reference(
    entity=Table,
    fqn="service.database.schema.table"
)

# Use in other entity creation
if table_ref:
    print(f"Table reference: {table_ref.id}")

Advanced Features

Custom Configuration

# Using custom authentication
from metadata.generated.schema.security.client.basicAuthenticationProvider import (
    BasicAuthenticationProvider,
)

server_config = OpenMetadataConnection(
    hostPort="https://your-openmetadata-instance.com/api",
    authProvider="basic",
    securityConfig=BasicAuthenticationProvider(
        username="your-username",
        password="your-password"
    ),
)

metadata = OpenMetadata(server_config)

Error Handling

from metadata.ingestion.ometa.ometa_api import (
    OpenMetadata,
    InvalidEntityException,
    EmptyPayloadException
)

try:
    table = metadata.get_by_name(entity=Table, fqn="non-existent-table")
except InvalidEntityException as e:
    print(f"Invalid entity: {e}")
except EmptyPayloadException as e:
    print(f"No data received: {e}")

Working with Versions

# Get entity version history
versions = metadata.list_versions(
    entity_id="your-entity-id",
    entity=Table
)

print(f"Found {len(versions.versions)} versions")

Common Use Cases

Data Discovery

# Search for tables containing specific keywords
all_tables = metadata.list_all_entities(entity=Table)

matching_tables = [
    table for table in all_tables
    if "customer" in table.name.lower()
]

for table in matching_tables:
    print(f"Found customer table: {table.fullyQualifiedName}")

Metadata Automation

# Bulk update table descriptions
tables_to_update = metadata.list_all_entities(
    entity=Table,
    params={"service": "production-db"}
)

for table in tables_to_update:
    if not table.description:
        # Update with a default description
        table.description = f"Production table: {table.name}"
        metadata.create_or_update(data=table)

Lineage Management

from metadata.generated.schema.type.entityLineage import EntityLineage

# Get table lineage
lineage = metadata.get_lineage_by_name(
    entity=Table,
    fqn="service.database.schema.table"
)

if lineage:
    print(f"Upstream entities: {len(lineage.upstreamEdges)}")
    print(f"Downstream entities: {len(lineage.downstreamEdges)}")

API Reference

The Python SDK provides a comprehensive API based on the OpenMetadata data model:

Core Classes

  • OpenMetadata: Main client class for API interactions
  • OpenMetadataConnection: Connection configuration
  • Entity Classes: Type-safe representations of all OpenMetadata entities

Key Methods

  • create_or_update(data): Create or update an entity
  • get_by_name(entity, fqn): Retrieve entity by fully qualified name
  • get_by_id(entity, entity_id): Retrieve entity by ID
  • list_entities(entity, **kwargs): List entities with pagination
  • list_all_entities(entity, **kwargs): Generator for all entities
  • delete(entity, entity_id): Delete an entity
  • health_check(): Check API connectivity

Type Safety

The Python SDK is built on generated Pydantic models, providing:
  • Type hints for better IDE support
  • Runtime validation of data structures
  • Auto-completion for entity properties
  • Error prevention through static typing
# Type-safe entity creation
from metadata.generated.schema.entity.data.table import Table

# IDE will provide auto-completion and type checking
table: Table = metadata.get_by_name(entity=Table, fqn="my.table")
if table:
    columns_count: int = len(table.columns) if table.columns else 0

Best Practices

  1. Connection Management: Reuse OpenMetadata client instances
  2. Error Handling: Always handle potential exceptions
  3. Pagination: Use list_all_entities() for large datasets
  4. Performance: Specify only required fields when fetching entities
  5. Resource Cleanup: Call metadata.close() when done