Extracting Database Schema Details in Datahub via GraphQL

Original Slack Thread

Hello, I am planning to get the column names and data types of a table in datahub thru GraphQL. Do you guys know which function or query I should use? Thanks.

you can use this GQL query

  dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:redshift,warehouse.sst.v_zt_payment,PROD)") {
    ...datasetSchema
    siblings {
      isPrimary
      siblings {
        urn
        type
        ... on Dataset {
          ...datasetSchema
          __typename
        }
        __typename
      }
      __typename
    }
    __typename
  }
}

fragment datasetSchema on Dataset {
  schemaMetadata(version: 0) {
    ...schemaMetadataFields
    __typename
  }
  editableSchemaMetadata {
    editableSchemaFieldInfo {
      fieldPath
      description
    }
  }
}

fragment schemaMetadataFields on SchemaMetadata {
  aspectVersion
  createdAt
  datasetUrn
  name
  platformUrn
  version
  cluster
  hash
  platformSchema {
    ... on TableSchema {
      schema
      __typename
    }
    ... on KeyValueSchema {
      keySchema
      valueSchema
      __typename
    }
    __typename
  }
  fields {
    ...schemaFieldFields
    __typename
  }
  primaryKeys
  __typename
}

fragment schemaFieldFields on SchemaField {
  fieldPath
  label
  jsonPath
  nullable
  description
  type
  nativeDataType
  recursive
  isPartOfKey
}```

Thank you! I am seeing that this is not part of the GQL documentation. Right? Since I want to extract all databases, schemas, and tables without the need to pass a URN. Is that possible?

this query will help you get schema details of datasets on a platform or list platforms

  search(input: {type: DATASET, 
   query: "*", start: 0, count: 50
      orFilters: [
      {
        and: [
        {
              field: "platform"
              values: ["redshift"]
              condition: CONTAIN
          }
        ]
      }
    ]
  }
  ) {
    start
    count
    total
    searchResults {
      entity {
        urn
        ... on Dataset {
            ...datasetSchema
    
        }
      }
    }
  }
}

fragment datasetSchema on Dataset {
  schemaMetadata(version: 0) {
    ...schemaMetadataFields
    __typename
  }
  editableSchemaMetadata {
    editableSchemaFieldInfo {
      fieldPath
      description
    }
  }
}

fragment schemaMetadataFields on SchemaMetadata {
  aspectVersion
  createdAt
  datasetUrn
  name
  platformUrn
  version
  cluster
  hash
  platformSchema {
    ... on TableSchema {
      schema
      __typename
    }
    ... on KeyValueSchema {
      keySchema
      valueSchema
      __typename
    }
    __typename
  }
  fields {
    ...schemaFieldFields
    __typename
  }
  primaryKeys
  __typename
}

fragment schemaFieldFields on SchemaField {
  fieldPath
  label
  jsonPath
  nullable
  description
  type
  nativeDataType
  recursive
  isPartOfKey
}```

Thanks a lot! Really big help!