共计 1879 个字符,预计需要花费 5 分钟才能阅读完成。
# Pulls lineage data from tableau server to understand tableau's servers
from tableau_api_lib import TableauServerConnection
import pandas as pd
import psycopg2
# Tableau Server credentials
tableau_server_config = {
'tableau_prod': {
'server': 'http://test',
'api_version': '3.10',
'username': 'tab',
'password': '123456',
'site_name': 'Site',
'site_url': 'Site'
}
}
conn = TableauServerConnection(tableau_server_config, 'tableau_prod')
# connect and query graphQL data from tableau server
def pull_graphql_data(graphql):
conn.sign_in()
connection = conn.metadata_graphql_query(query=graphql)
return connection
def pull_postgres_data():
"""# Tableau postgres db credentials"""
tableau_postgres_config = psycopg2.connect(
host="tableau",
database="workgroup",
user="readonly",
password="password",
port="8080")
sql = """
SELECT DISTINCT background_jobs.title,
P.name AS ProjectName,
schedules.name
FROM background_jobs
LEFT JOIN tasks AS task ON background_jobs.correlation_id = task.id
LEFT JOIN flow_runs as flow_t ON task.id = flow_t.task_id
LEFT JOIN flows as flow ON flow_t.flow_id = flow.id
LEFT JOIN hist_projects AS P ON P.project_id = flow.project_id
LEFT JOIN _schedules AS schedules ON task.schedule_id = schedules.id
WHERE CAST(background_jobs.created_at AS DATE) >= current_date - 1
"""
data = pd.read_sql(sql, tableau_postgres_config)
return data
def merge_data():
graphql_query_flows = """
{
flows {
name
projectName
owner{name}
downstreamWorkbooks {name}
}
}
"""
# Convert graphQL data into json
flow_json = pull_graphql_data(graphql_query_flows).json()
# Import json data into dataframe
flow_normalized_data = pd.json_normalize(flow_json['data']['flows']
, meta=['name', 'projectName', ['owner', 'name']]
, meta_prefix='flows.'
, record_prefix='downstreamWorkbooks.'
)
# Merge graphql data with postgres data to bring through schedule assignment by flow name
full_data = flow_normalized_data.merge(pull_postgres_data(), left_on=['name','projectName'], right_on=['title', 'projectname'])
# Write to excel to view data
full_data.to_excel("flows.xlsx")
conn.sign_out()
if __name__ == "__main__":
正文完