> ## Documentation Index > Fetch the complete documentation index at: https://docs.open-metadata.org/llms.txt > Use this file to discover all available pages before exploring further. # Run dbt Workflow Externally | OpenMetadata Guide > Run dbt workflows externally to sync metadata from CI/CD systems or scheduled orchestrations. export const CodePanel = ({children, fileName = 'config.yaml', showLineNumbers = false}) => { const codePanelRef = useRef(null); const codeContentRef = useRef(null); const isProgrammaticScroll = useRef(false); const hoverTimeout = useRef(null); useEffect(() => { let tries = 0; const wrapLines = () => { const root = codeContentRef.current; if (!root) return; const pres = Array.from(root.querySelectorAll('pre')); if (!pres.length) { if (tries++ < 20) requestAnimationFrame(wrapLines); return; } let globalLine = 1; pres.forEach(pre => { const code = pre.querySelector('code') || pre; if (!code || code.dataset.wrapped === 'true') return; const raw = code.textContent || ''; let lines = raw.split('\n'); while (lines[0] === '') lines.shift(); while (lines[lines.length - 1] === '') lines.pop(); code.innerHTML = lines.map(line => { const ln = globalLine++; const num = showLineNumbers ? `${ln}` : ''; const safe = line.replace(//g, '>') || ' '; return `${num}${safe}`; }).join(''); code.dataset.wrapped = 'true'; }); }; wrapLines(); }, [children, showLineNumbers]); useEffect(() => { const panel = codePanelRef.current; const content = codeContentRef.current; if (!panel || !content) return; const waitForLines = () => { const codeLines = content.querySelectorAll('.code-line'); if (!codeLines.length) { requestAnimationFrame(waitForLines); return; } setupHighlighting(codeLines); }; const setupHighlighting = codeLines => { const layout = panel.closest('.split-layout'); const sections = layout.querySelectorAll('.content-section'); const parseLines = str => { if (!str) return []; const out = []; str.split(',').forEach(p => { if (p.includes('-')) { const [s, e] = p.split('-').map(Number); for (let i = s; i <= e; i++) out.push(i); } else { const n = Number(p); if (!isNaN(n)) out.push(n); } }); return out; }; const clearHighlight = () => { codeLines.forEach(l => l.classList.remove('highlighted')); }; const highlight = lines => { clearHighlight(); lines.forEach(n => { const el = content.querySelector(`.code-line[data-line="${n}"]`); if (el) el.classList.add('highlighted'); }); }; const scrollToLines = lines => { if (!lines.length) return; const first = lines[0]; const targetLine = lines.length > 1 ? first : lines[0]; const el = content.querySelector(`.code-line[data-line="${targetLine}"]`); if (!el) return; isProgrammaticScroll.current = true; const containerRect = content.getBoundingClientRect(); const elRect = el.getBoundingClientRect(); const offset = elRect.top - containerRect.top + content.scrollTop; const TOP_PADDING = 16; content.scrollTo({ top: Math.max(offset - TOP_PADDING, 0), behavior: 'smooth' }); setTimeout(() => { isProgrammaticScroll.current = false; }, 200); }; const activate = (section, scroll) => { if (section.classList.contains('active')) return; sections.forEach(s => s.classList.remove('active')); section.classList.add('active'); const lines = parseLines(section.dataset.lines); highlight(lines); if (scroll) scrollToLines(lines); }; const observer = new IntersectionObserver(entries => { if (isProgrammaticScroll.current) return; entries.forEach(e => { if (e.isIntersecting) activate(e.target, false); }); }, { threshold: 0.3, rootMargin: '-80px 0px -40% 0px' }); sections.forEach(section => { observer.observe(section); section.addEventListener('click', () => activate(section, true)); section.addEventListener('mouseenter', () => { clearTimeout(hoverTimeout.current); hoverTimeout.current = setTimeout(() => activate(section, true), 80); }); }); if (sections[0]) activate(sections[0], false); }; waitForLines(); }, []); const handleCopy = e => { const btn = e.currentTarget; const codeLines = codeContentRef.current?.querySelectorAll('.code-line'); if (!codeLines || codeLines.length === 0) return; const text = Array.from(codeLines).map(line => { const clone = line.cloneNode(true); const lineNumber = clone.querySelector('.line-number'); if (lineNumber) lineNumber.remove(); return clone.textContent; }).join('\n'); if (!text) return; navigator.clipboard.writeText(text).then(() => { btn.dataset.copied = 'true'; setTimeout(() => btn.dataset.copied = 'false', 1500); }); }; return

{fileName}

{children}

; }; export const ContentSection = ({id, title, lines, children}) =>

{title &&

{title}

} {children}

; export const ContentPanel = ({children}) =>

{children}

; export const CodePreview = ({children}) => { const [instanceId] = useState(() => `preview-${Math.random().toString(36).slice(2)}`); useEffect(() => { const nav = document.querySelector('nav') || document.querySelector('header') || document.querySelector('[class*="nav"]'); if (nav) { document.documentElement.style.setProperty('--navbar-height', `${nav.offsetHeight}px`); } }, []); return

{children}

; }; export const ConnectorDetailsHeader = ({name, icon, stage, availableFeatures, unavailableFeatures = [], availableFeaturesCollate = []}) => { const showSubHeading = availableFeatures?.length > 0 || unavailableFeatures?.length > 0 || availableFeaturesCollate?.length > 0; const totalAvailableFeatures = [...availableFeatures || [], ...availableFeaturesCollate || []]; return

{icon &&

}

{name}

{stage}

{showSubHeading &&

Feature List

{totalAvailableFeatures.map(feature =>

✓ {feature}

)} {unavailableFeatures.map(feature =>

✕ {feature}

)}

}

; }; In this section, we provide guides and references to run the dbt workflow externally. * [Requirements](#requirements) * [Python Requirements](#python-requirements) * [dbt Ingestion](#dbt-ingestion) * [Run with the CLI](#2.-run-with-the-cli) ## How to Run the Connector Externally To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with custom Airflow plugins to handle the workflow deployment. If, instead, you want to manage your workflows externally on your preferred orchestrator, you can check the following docs to run the Ingestion Framework **anywhere**. Get more information about running the Ingestion Framework Externally ## Requirements You must have access to dbt artifacts. At minimum, the `manifest.json` file is required. The `catalog.json` and `run_results.json` files are optional but recommended for richer metadata. For dbt Cloud, create a service token with the `Account Viewer` permission and collect the account, project, and job IDs if you want to target a specific run. ## Python Requirements We have support for Python versions **3.9-3.11** To run the dbt ingestion, install: ```bash theme={null} pip3 install "openmetadata-ingestion[dbt]" ``` ## dbt Ingestion All connectors are defined as JSON Schemas. You can find the structure for dbt workflows in the OpenMetadata spec repository. ### 1. Define the YAML Config Choose one of the following dbt artifact sources: * [AWS S3 Buckets](#1.-aws-s3-buckets) * [Google Cloud Storage Buckets](#2.-google-cloud-storage-buckets) * [Azure Storage Buckets](#3.-azure-storage-buckets) * [Local Storage](#4.-local-storage) * [File Server](#5.-file-server) * [dbt Cloud](#6.-dbt-cloud) ### 1. dbt Core: AWS S3 Buckets In this configuration we fetch dbt artifacts from an S3 bucket. Configure the source type and service name for your dbt workflow. Set `dbtConfigType` to `s3` and provide AWS credentials or a region. AWS Access Key Credentials **awsAccessKeyId** and **awsSecretAccessKey** are used to authenticate and authorize programmatic requests to AWS services. An access key consists of: * **Access Key ID** (for example, `AKIAIOSFODNN7EXAMPLE`) * **Secret Access Key** (for example, `wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY`) Both values must be provided together when using static credentials. For more information, see [Managing access keys](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). AWS Session Token **awsSessionToken** is required when using **temporary security credentials**, such as those obtained via AWS STS. The session token must be provided along with the access key ID and secret access key for the duration of the session. AWS Region **awsRegion** specifies the AWS Region where the target service is deployed (for example, `us-east-1`). This is the **only required parameter** when configuring an AWS connection. Other credentials can be resolved automatically using environment variables, AWS profiles, or IAM roles. Learn more in the [AWS Regions and Availability Zones documentation](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html). Custom Endpoint URL **endPointURL** is an optional custom endpoint used to connect to an AWS service. You may want to specify this when: * Using VPC endpoints * Connecting to local or AWS-compatible services * Overriding the default regional endpoint See [AWS service endpoints](https://docs.aws.amazon.com/general/latest/gr/rande.html) for details. AWS Profile Name **profileName** specifies the AWS CLI profile to use for authentication. Profiles store credentials and configuration in AWS config files. If not specified, the `default` profile is used. Learn more about [Named profiles for the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html). Assume Role ARN **assumeRoleArn** is the Amazon Resource Name (ARN) of the IAM role to assume. This is commonly used for: * Cross-account access * Delegated permissions * Enhanced security setups This field is **required** when using Assume Role authentication. See the [AssumeRole API reference](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html). Assume Role Session Name **assumeRoleSessionName** identifies the assumed role session. This value helps uniquely identify a session when the same role is assumed multiple times or by different principals. If not provided, the default value `OpenMetadataSession` is used. Assume Role Source Identity **assumeRoleSourceIdentity** is an optional source identity passed when assuming a role. This value is recorded in AWS CloudTrail logs and can be used to trace actions performed using the assumed role. See [Source Identity in AssumeRole](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html). #### dbt Prefix Configuration **dbtPrefixConfig**: Optional config to specify the bucket name and directory path where the dbt files are stored. If config is not provided ingestion will scan all the buckets for dbt files. **dbtBucketName**: Name of the bucket where the dbt files are stored. **dbtObjectPrefix**: Path of the folder where the dbt files are stored. Follow the documentation [here](/connectors/database/dbt/setup-multiple-dbt-projects) to configure multiple dbt projects #### Source Config * **dbtUpdateDescriptions**: Configuration to update the description from dbt or not. If set to true, descriptions from dbt will override the already present descriptions on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-descriptions). * **dbtUpdateOwners**: Configuration to update the owner from dbt or not. If set to true, owners from dbt will override the already present owners on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-owner). * **includeTags**: Set to `true` or `false` to ingest tags from dbt. Default is `true`. * **dbtClassificationName**: Custom OpenMetadata Classification name for dbt tags. * **databaseFilterPattern**, **schemaFilterPattern**: Add filters to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database). * **tableFilterPattern**: Add a filter to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/table). To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.

The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.

**Logger Level** You can specify the `loggerLevel` depending on your needs. If you are trying to troubleshoot an ingestion, running with `DEBUG` will give you far more traces for identifying issues.

**JWT Token** JWT tokens will allow your clients to authenticate against the OpenMetadata server. To enable JWT Tokens, you will get more details [here](/deployment/security/enable-jwt-tokens). You can refer to the JWT Troubleshooting section [link](/deployment/security/jwt-troubleshooting) for any issues in your JWT configuration.

**Store Service Connection** If set to `true` (default), we will store the sensitive information either encrypted via the Fernet Key in the database or externally, if you have configured any [Secrets Manager](/deployment/secrets-manager). If set to `false`, the service will be created, but the service connection information will only be used by the Ingestion Framework at runtime, and won't be sent to the OpenMetadata server.

**SSL Configuration** If you have added SSL to the [OpenMetadata server](/deployment/security/enable-ssl), then you will need to handle the certificates when running the ingestion too. You can either set `verifySSL` to `ignore`, or have it as `validate`, which will require you to set the `sslConfig.caCertificate` with a local path where your ingestion runs that points to the server certificate file. Find more information on how to troubleshoot SSL issues [here](/deployment/security/enable-ssl/ssl-troubleshooting).

**ingestionPipelineFQN** Fully qualified name of ingestion pipeline, used to identify the current ingestion pipeline.

```yaml theme={null} source: type: dbt serviceName: service_name sourceConfig: config: type: DBT dbtConfigSource: dbtConfigType: s3 dbtSecurityConfig: awsConfig: awsRegion: us-east-2 ``` ```yaml theme={null} awsAccessKeyId: KEY awsSecretAccessKey: SECRET ``` ```yaml theme={null} # awsSessionToken: TOKEN ``` ```yaml theme={null} awsRegion: us-east-2 ``` ```yaml theme={null} # endPointURL: https://athena.us-east-2.amazonaws.com/custom ``` ```yaml theme={null} # profileName: profile ``` ```yaml theme={null} # assumeRoleArn: "arn:partition:service:region:account:resource" ``` ```yaml theme={null} # assumeRoleSessionName: session ``` ```yaml theme={null} # assumeRoleSourceIdentity: identity ``` ```yaml theme={null} dbtPrefixConfig: dbtBucketName: bucket_name dbtObjectPrefix: main_dir/dbt_files/ ``` ```yaml theme={null} # dbtUpdateDescriptions: true or false # dbtUpdateOwners: true or false # includeTags: true or false # dbtClassificationName: dbtTags # databaseFilterPattern: # includes: # - .*db.* # excludes: # - .*demo.* # schemaFilterPattern: # includes: # - .*schema.* # excludes: # - .*demo.* # tableFilterPattern: # includes: # - .*table.* # excludes: # - .*demo.* ``` ```yaml theme={null} sink: type: metadata-rest config: {} ``` ```yaml theme={null} workflowConfig: loggerLevel: INFO # DEBUG, INFO, WARNING or ERROR openMetadataServerConfig: hostPort: "http://localhost:8585/api" authProvider: openmetadata securityConfig: jwtToken: "{bot_jwt_token}" ## Store the service Connection information storeServiceConnection: true # false ## Secrets Manager Configuration # secretsManagerProvider: aws, azure or noop # secretsManagerLoader: airflow or env ## If SSL, fill the following # verifySSL: validate # or ignore # sslConfig: # caCertificate: /local/path/to/certificate # ingestionPipelineFQN: . ## e.g., "my_redshift.metadata" ``` ### 2. dbt Core: Google Cloud Storage Buckets In this configuration we fetch dbt artifacts from a GCS bucket. Configure the source type and service name for your dbt workflow. Set `dbtConfigType` to `gcs` and provide either a credentials file path or inline credentials. #### dbt Prefix Configuration **dbtPrefixConfig**: Optional config to specify the bucket name and directory path where the dbt files are stored. If config is not provided ingestion will scan all the buckets for dbt files. **dbtBucketName**: Name of the bucket where the dbt files are stored. **dbtObjectPrefix**: Path of the folder where the dbt files are stored. Follow the documentation [here](/connectors/database/dbt/setup-multiple-dbt-projects) to configure multiple dbt projects #### Source Config * **dbtUpdateDescriptions**: Configuration to update the description from dbt or not. If set to true, descriptions from dbt will override the already present descriptions on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-descriptions). * **dbtUpdateOwners**: Configuration to update the owner from dbt or not. If set to true, owners from dbt will override the already present owners on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-owner). * **includeTags**: Set to `true` or `false` to ingest tags from dbt. Default is `true`. * **dbtClassificationName**: Custom OpenMetadata Classification name for dbt tags. * **databaseFilterPattern**, **schemaFilterPattern**: Add filters to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database). * **tableFilterPattern**: Add a filter to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/table). To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.

The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.

**Logger Level** You can specify the `loggerLevel` depending on your needs. If you are trying to troubleshoot an ingestion, running with `DEBUG` will give you far more traces for identifying issues.

**ingestionPipelineFQN** Fully qualified name of ingestion pipeline, used to identify the current ingestion pipeline.

```yaml theme={null} source: type: dbt serviceName: service_name sourceConfig: config: type: DBT dbtConfigSource: dbtConfigType: gcs dbtSecurityConfig: gcpConfig: gcpCredentialsPath: /path/to/service-account.json ``` ```yaml theme={null} dbtPrefixConfig: dbtBucketName: bucket_name dbtObjectPrefix: main_dir/dbt_files/ ``` ```yaml theme={null} # dbtUpdateDescriptions: true or false # dbtUpdateOwners: true or false # includeTags: true or false # dbtClassificationName: dbtTags # databaseFilterPattern: # includes: # - .*db.* # excludes: # - .*demo.* # schemaFilterPattern: # includes: # - .*schema.* # excludes: # - .*demo.* # tableFilterPattern: # includes: # - .*table.* # excludes: # - .*demo.* ``` ```yaml theme={null} sink: type: metadata-rest config: {} ``` ```yaml theme={null} workflowConfig: loggerLevel: INFO # DEBUG, INFO, WARNING or ERROR openMetadataServerConfig: hostPort: "http://localhost:8585/api" authProvider: openmetadata securityConfig: jwtToken: "{bot_jwt_token}" ## Store the service Connection information storeServiceConnection: true # false ## Secrets Manager Configuration # secretsManagerProvider: aws, azure or noop # secretsManagerLoader: airflow or env ## If SSL, fill the following # verifySSL: validate # or ignore # sslConfig: # caCertificate: /local/path/to/certificate # ingestionPipelineFQN: . ## e.g., "my_redshift.metadata" ``` ### 3. dbt Core: Azure Storage Buckets In this configuration we fetch dbt artifacts from Azure Storage. Configure the source type and service name for your dbt workflow. Set `dbtConfigType` to `azure` and provide Azure AD credentials. #### dbt Prefix Configuration **dbtPrefixConfig**: Optional config to specify the bucket name and directory path where the dbt files are stored. If config is not provided ingestion will scan all the buckets for dbt files. **dbtBucketName**: Name of the bucket where the dbt files are stored. **dbtObjectPrefix**: Path of the folder where the dbt files are stored. Follow the documentation [here](/connectors/database/dbt/setup-multiple-dbt-projects) to configure multiple dbt projects #### Source Config * **dbtUpdateDescriptions**: Configuration to update the description from dbt or not. If set to true, descriptions from dbt will override the already present descriptions on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-descriptions). * **dbtUpdateOwners**: Configuration to update the owner from dbt or not. If set to true, owners from dbt will override the already present owners on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-owner). * **includeTags**: Set to `true` or `false` to ingest tags from dbt. Default is `true`. * **dbtClassificationName**: Custom OpenMetadata Classification name for dbt tags. * **databaseFilterPattern**, **schemaFilterPattern**: Add filters to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database). * **tableFilterPattern**: Add a filter to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/table). To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.

The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.

**Logger Level** You can specify the `loggerLevel` depending on your needs. If you are trying to troubleshoot an ingestion, running with `DEBUG` will give you far more traces for identifying issues.

**ingestionPipelineFQN** Fully qualified name of ingestion pipeline, used to identify the current ingestion pipeline.

```yaml theme={null} source: type: dbt serviceName: service_name sourceConfig: config: type: DBT dbtConfigSource: dbtConfigType: azure dbtSecurityConfig: clientId: client-id clientSecret: client-secret tenantId: tenant-id accountName: account-name ``` ```yaml theme={null} dbtPrefixConfig: dbtBucketName: bucket_name dbtObjectPrefix: main_dir/dbt_files/ ``` ```yaml theme={null} # dbtUpdateDescriptions: true or false # dbtUpdateOwners: true or false # includeTags: true or false # dbtClassificationName: dbtTags # databaseFilterPattern: # includes: # - .*db.* # excludes: # - .*demo.* # schemaFilterPattern: # includes: # - .*schema.* # excludes: # - .*demo.* # tableFilterPattern: # includes: # - .*table.* # excludes: # - .*demo.* ``` ```yaml theme={null} sink: type: metadata-rest config: {} ``` ```yaml theme={null} workflowConfig: loggerLevel: INFO # DEBUG, INFO, WARNING or ERROR openMetadataServerConfig: hostPort: "http://localhost:8585/api" authProvider: openmetadata securityConfig: jwtToken: "{bot_jwt_token}" ## Store the service Connection information storeServiceConnection: true # false ## Secrets Manager Configuration # secretsManagerProvider: aws, azure or noop # secretsManagerLoader: airflow or env ## If SSL, fill the following # verifySSL: validate # or ignore # sslConfig: # caCertificate: /local/path/to/certificate # ingestionPipelineFQN: . ## e.g., "my_redshift.metadata" ``` ### 4. dbt Core: Local Storage In this configuration we fetch dbt artifacts from the machine running the ingestion. Configure the source type and service name for your dbt workflow. Set `dbtConfigType` to `local` and provide file paths for dbt artifacts. #### Source Config * **dbtUpdateDescriptions**: Configuration to update the description from dbt or not. If set to true, descriptions from dbt will override the already present descriptions on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-descriptions). * **dbtUpdateOwners**: Configuration to update the owner from dbt or not. If set to true, owners from dbt will override the already present owners on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-owner). * **includeTags**: Set to `true` or `false` to ingest tags from dbt. Default is `true`. * **dbtClassificationName**: Custom OpenMetadata Classification name for dbt tags. * **databaseFilterPattern**, **schemaFilterPattern**: Add filters to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database). * **tableFilterPattern**: Add a filter to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/table). To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.

The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.

**Logger Level** You can specify the `loggerLevel` depending on your needs. If you are trying to troubleshoot an ingestion, running with `DEBUG` will give you far more traces for identifying issues.

**ingestionPipelineFQN** Fully qualified name of ingestion pipeline, used to identify the current ingestion pipeline.

```yaml theme={null} source: type: dbt serviceName: service_name sourceConfig: config: type: DBT dbtConfigSource: dbtConfigType: local dbtCatalogFilePath: /path/to/catalog.json dbtManifestFilePath: /path/to/manifest.json dbtRunResultsFilePath: /path/to/run_results.json ``` ```yaml theme={null} # dbtUpdateDescriptions: true or false # dbtUpdateOwners: true or false # includeTags: true or false # dbtClassificationName: dbtTags # databaseFilterPattern: # includes: # - .*db.* # excludes: # - .*demo.* # schemaFilterPattern: # includes: # - .*schema.* # excludes: # - .*demo.* # tableFilterPattern: # includes: # - .*table.* # excludes: # - .*demo.* ``` ```yaml theme={null} sink: type: metadata-rest config: {} ``` ```yaml theme={null} workflowConfig: loggerLevel: INFO # DEBUG, INFO, WARNING or ERROR openMetadataServerConfig: hostPort: "http://localhost:8585/api" authProvider: openmetadata securityConfig: jwtToken: "{bot_jwt_token}" ## Store the service Connection information storeServiceConnection: true # false ## Secrets Manager Configuration # secretsManagerProvider: aws, azure or noop # secretsManagerLoader: airflow or env ## If SSL, fill the following # verifySSL: validate # or ignore # sslConfig: # caCertificate: /local/path/to/certificate # ingestionPipelineFQN: . ## e.g., "my_redshift.metadata" ``` ### 5. dbt Core: File Server In this configuration we fetch dbt artifacts from an HTTP or file server. Configure the source type and service name for your dbt workflow. Set `dbtConfigType` to `http` and provide HTTP URLs for dbt artifacts. #### Source Config * **dbtUpdateDescriptions**: Configuration to update the description from dbt or not. If set to true, descriptions from dbt will override the already present descriptions on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-descriptions). * **dbtUpdateOwners**: Configuration to update the owner from dbt or not. If set to true, owners from dbt will override the already present owners on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-owner). * **includeTags**: Set to `true` or `false` to ingest tags from dbt. Default is `true`. * **dbtClassificationName**: Custom OpenMetadata Classification name for dbt tags. * **databaseFilterPattern**, **schemaFilterPattern**: Add filters to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database). * **tableFilterPattern**: Add a filter to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/table). To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.

The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.

**Logger Level** You can specify the `loggerLevel` depending on your needs. If you are trying to troubleshoot an ingestion, running with `DEBUG` will give you far more traces for identifying issues.

**ingestionPipelineFQN** Fully qualified name of ingestion pipeline, used to identify the current ingestion pipeline.

```yaml theme={null} source: type: dbt serviceName: service_name sourceConfig: config: type: DBT dbtConfigSource: dbtConfigType: http dbtCatalogHttpPath: https://example.com/catalog.json dbtManifestHttpPath: https://example.com/manifest.json dbtRunResultsHttpPath: https://example.com/run_results.json ``` ```yaml theme={null} # dbtUpdateDescriptions: true or false # dbtUpdateOwners: true or false # includeTags: true or false # dbtClassificationName: dbtTags # databaseFilterPattern: # includes: # - .*db.* # excludes: # - .*demo.* # schemaFilterPattern: # includes: # - .*schema.* # excludes: # - .*demo.* # tableFilterPattern: # includes: # - .*table.* # excludes: # - .*demo.* ``` ```yaml theme={null} sink: type: metadata-rest config: {} ``` ```yaml theme={null} workflowConfig: loggerLevel: INFO # DEBUG, INFO, WARNING or ERROR openMetadataServerConfig: hostPort: "http://localhost:8585/api" authProvider: openmetadata securityConfig: jwtToken: "{bot_jwt_token}" ## Store the service Connection information storeServiceConnection: true # false ## Secrets Manager Configuration # secretsManagerProvider: aws, azure or noop # secretsManagerLoader: airflow or env ## If SSL, fill the following # verifySSL: validate # or ignore # sslConfig: # caCertificate: /local/path/to/certificate # ingestionPipelineFQN: . ## e.g., "my_redshift.metadata" ``` ### 6. dbt Cloud: API-Based Ingestion In this configuration we fetch dbt artifacts from dbt Cloud APIs. The dbt Cloud workflow uses the dbt Cloud v2 APIs to retrieve the latest successful run and download artifacts. Configure the source type and service name for your dbt workflow. Set `dbtConfigType` to `cloud` and provide your dbt Cloud credentials and identifiers. #### Source Config * **dbtUpdateDescriptions**: Configuration to update the description from dbt or not. If set to true, descriptions from dbt will override the already present descriptions on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-descriptions). * **dbtUpdateOwners**: Configuration to update the owner from dbt or not. If set to true, owners from dbt will override the already present owners on the entity. For more details visit [here](/connectors/database/dbt/ingest-dbt-owner). * **includeTags**: Set to `true` or `false` to ingest tags from dbt. Default is `true`. * **dbtClassificationName**: Custom OpenMetadata Classification name for dbt tags. * **databaseFilterPattern**, **schemaFilterPattern**: Add filters to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database). * **tableFilterPattern**: Add a filter to filter out models from the dbt manifest. Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/table). To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.

The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation.

**Logger Level** You can specify the `loggerLevel` depending on your needs. If you are trying to troubleshoot an ingestion, running with `DEBUG` will give you far more traces for identifying issues.

**ingestionPipelineFQN** Fully qualified name of ingestion pipeline, used to identify the current ingestion pipeline.

```yaml theme={null} source: type: dbt serviceName: service_name sourceConfig: config: type: DBT dbtConfigSource: dbtConfigType: cloud dbtCloudAuthToken: AUTH_TOKEN dbtCloudAccountId: ACCOUNT_ID dbtCloudProjectId: PROJECT_ID dbtCloudJobId: JOB_ID dbtCloudUrl: https://cloud.getdbt.com ``` ```yaml theme={null} # dbtUpdateDescriptions: true or false # dbtUpdateOwners: true or false # includeTags: true or false # dbtClassificationName: dbtTags # databaseFilterPattern: # includes: # - .*db.* # excludes: # - .*demo.* # schemaFilterPattern: # includes: # - .*schema.* # excludes: # - .*demo.* # tableFilterPattern: # includes: # - .*table.* # excludes: # - .*demo.* ``` ```yaml theme={null} sink: type: metadata-rest config: {} ``` ```yaml theme={null} workflowConfig: loggerLevel: INFO # DEBUG, INFO, WARNING or ERROR openMetadataServerConfig: hostPort: "http://localhost:8585/api" authProvider: openmetadata securityConfig: jwtToken: "{bot_jwt_token}" ## Store the service Connection information storeServiceConnection: true # false ## Secrets Manager Configuration # secretsManagerProvider: aws, azure or noop # secretsManagerLoader: airflow or env ## If SSL, fill the following # verifySSL: validate # or ignore # sslConfig: # caCertificate: /local/path/to/certificate # ingestionPipelineFQN: . ## e.g., "my_redshift.metadata" ``` ### 2. Run with the CLI First, we will need to save the YAML file. Afterward, and with all requirements installed, we can run: ```bash theme={null} metadata ingest -c ``` Note that from connector to connector, this recipe will always be the same. By updating the YAML configuration, you will be able to extract metadata from different sources.