Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restyle Example Script: Get table names used by data source #12

Merged
merged 4 commits into from
Apr 16, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 42 additions & 29 deletions redash_toolbelt/examples/find-table-names.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import itertools, json, re
import itertools
import json
import re

import click
import pytest

from redash_toolbelt import Redash


Expand All @@ -10,21 +14,18 @@ def find_table_names(url, key, data_source_id):

schema_tables = [
token.get("name")
for token in client._get(f"api/data_sources/{data_source_id}/schema")
.json()
.get("schema", [])
for token in client._get(f"api/data_sources/{data_source_id}/schema").
json().get("schema", [])
]

queries = [
query
for query in client.paginate(client.queries)
query for query in client.paginate(client.queries)
if query.get("data_source_id", None) == int(data_source_id)
]

tables_by_qry = {
query["id"]: [
table
for table in extract_table_names(query["query"])
table for table in extract_table_names(query["query"])
if table in schema_tables or len(schema_tables) == 0
]
for query in queries
Expand All @@ -45,11 +46,12 @@ def format_query(str_sql):

def extract_table_names(str_sql):

PATTERN = re.compile(
r"(?:FROM|JOIN)(?:\s+)([^\s\(\)]+)", flags=re.IGNORECASE | re.UNICODE
)
PATTERN = re.compile(r"(?:FROM|JOIN)(?:\s+)([^\s\(\)]+)",
flags=re.IGNORECASE | re.UNICODE)

regex_matches = [match for match in re.findall(PATTERN, format_query(str_sql))]
regex_matches = [
match for match in re.findall(PATTERN, format_query(str_sql))
]

# For test_6: expand any comma-delimitted matches
split_matches = [i.split(",") for i in regex_matches]
Expand All @@ -61,9 +63,8 @@ def print_summary(tables_by_qry):
"""Builds a summary showing table names and count of queries that reference them."""

summary = {
table_name: sum(
[1 for tables in tables_by_qry.values() if table_name in tables]
)
table_name:
sum([1 for tables in tables_by_qry.values() if table_name in tables])
for table_name in itertools.chain(*tables_by_qry.values())
}

Expand All @@ -73,7 +74,9 @@ def print_summary(tables_by_qry):
print(f"{'table':>{align}} | {'number of queries':>17}")
print("-" * align + " | " + "-" * 17)

for t, num in sorted(summary.items(), key=lambda item: item[1], reverse=True):
for t, num in sorted(summary.items(),
key=lambda item: item[1],
reverse=True):
print(f"{t:>{align}} | {num:>17}")

print("\n")
Expand All @@ -82,19 +85,22 @@ def print_summary(tables_by_qry):
def print_details(tables_by_qry):
"""Prints out (query_id, tablename) tuples"""

details = [
[(query, table) for table in tables] for query, tables in tables_by_qry.items()
]
details = [[(query, table) for table in tables]
for query, tables in tables_by_qry.items()]

for row in itertools.chain(*details):
print(",".join([str(i) for i in row]))


@click.command()
@click.argument("url",)
@click.argument("key",)
@click.argument(
"url", )
@click.argument(
"key", )
@click.argument("data_source_id")
@click.option("--detail", is_flag=True, help="Prints out all table/query pairs?")
@click.option("--detail",
is_flag=True,
help="Prints out all table/query pairs?")
def main(url, key, data_source_id, detail):
"""Find table names referenced in queries against DATA_SOURCE_ID"""

Expand All @@ -119,7 +125,8 @@ def test_1():
tables = extract_table_names(sql)
expected = ["table0", "table1"]

assert len(tables) == len(expected) and all([i in expected for i in tables])
assert len(tables) == len(expected) and all(
[i in expected for i in tables])


def test_2():
Expand All @@ -131,7 +138,8 @@ def test_2():
tables = extract_table_names(sql)
expected = ["table0", "table1"]

assert len(tables) == len(expected) and all([i in expected for i in tables])
assert len(tables) == len(expected) and all(
[i in expected for i in tables])


def test_3():
Expand All @@ -143,7 +151,8 @@ def test_3():
tables = extract_table_names(sql)
expected = ["table0", "table1"]

assert len(tables) == len(expected) and all([i in expected for i in tables])
assert len(tables) == len(expected) and all(
[i in expected for i in tables])


def test_4():
Expand All @@ -155,7 +164,8 @@ def test_4():
tables = extract_table_names(sql)
expected = ["schema.table0", "schema.table1"]

assert len(tables) == len(expected) and all([i in expected for i in tables])
assert len(tables) == len(expected) and all(
[i in expected for i in tables])


def test_5():
Expand All @@ -171,7 +181,8 @@ def test_5():
tables = extract_table_names(sql)
expected = ["table0", "table1"]

assert len(tables) == len(expected) and all([i in expected for i in tables])
assert len(tables) == len(expected) and all(
[i in expected for i in tables])


def test_6():
Expand All @@ -185,7 +196,8 @@ def test_6():
tables = extract_table_names(sql)
expected = ["table1", "table2", "table3", "table4", "table5", "table6"]

assert len(tables) == len(expected) and all([i in expected for i in tables])
assert len(tables) == len(expected) and all(
[i in expected for i in tables])


def test_7():
Expand All @@ -197,4 +209,5 @@ def test_7():
tables = extract_table_names(sql)
expected = ["[table0]", "[table1]"]

assert len(tables) == len(expected) and all([i in expected for i in tables])
assert len(tables) == len(expected) and all(
[i in expected for i in tables])