From d044dcc1aa057b38ede748217268aee46a74dad4 Mon Sep 17 00:00:00 2001 From: mathiasg Date: Wed, 13 Dec 2023 16:32:19 -0500 Subject: [PATCH 1/7] ENH: Improve project usage querying Reduces # of queries made to the database, with better data-munging. --- migas/server/database.py | 123 +++++++++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 44 deletions(-) diff --git a/migas/server/database.py b/migas/server/database.py index bfaa3e3..447cfae 100644 --- a/migas/server/database.py +++ b/migas/server/database.py @@ -1,7 +1,7 @@ -from typing import List +import typing as ty # from asyncpg import Record -from sqlalchemy import distinct, func, select, text +from sqlalchemy import distinct, func, select, case, desc from sqlalchemy.dialects.postgresql import insert from .models import Table, gen_session, get_project_tables, projects @@ -144,7 +144,7 @@ async def query_usage_unique(project: Table) -> int: return res.scalars().one() -async def query_projects() -> List[str]: +async def query_projects() -> list[str]: async with gen_session() as session: res = await session.execute(select(projects.c.project)) return res.scalars().all() @@ -156,53 +156,88 @@ async def project_exists(project: str) -> bool: return bool(res.one_or_none()) -async def get_viz_data(project: str) -> list: +async def get_viz_data( + project_name: str, + version: str | None = None, + date_group: ty.Literal['week', 'month', 'year'] = 'month' +) -> list: """ - TODO: Implement bucket sorting. - - Implements the following SQL pseudocode: - - select distinct version from where version not like '%+%'; - - for vers in ^: - - select count(distinct session_id) from where is_ci = false and version = ver; - - select count(distinct session_id) from where is_ci = false and version = ver and status = 'C'; - - select count(distinct user_id) from where is_ci = false and version = ver; - - select count(*), date_part('isoyear', timestamp) as year, date_part('week', timestamp) as week from where status = 'C' group by year, week order by year, week; + Filter project usage into groups, based on versions and dates. """ - p, _ = await get_project_tables(project) + project, _ = await get_project_tables(project_name) - async with gen_session() as session: - # we want to return a table with: - # version | total_runs (unique session_id) | sucessful_runs | users (unique user_id) - # TODO: index should be applied to version, session_id, user_id columns - # TODO: this should be done within a single query - - # first grab all different versions - versions = await session.execute( - select(distinct(p.c.version)).where(p.c.version.not_like('%+%')) + match date_group: + case 'week': + datefmt = 'YYYY-WW' + case 'month': + datefmt = 'YYYY-MM' + case 'year': + datefmt = 'YYYY' + case _: + raise NotImplementedError + + # Create a subquery to: + # - filter out version(s) + # - convert timestamps into YEAR-WEEK values + subq0 = ( + select( + project.c.version, + project.c.session_id, + func.to_char(project.c.timestamp, datefmt).label('date'), + project.c.status ) - data = {v: {} for v in versions.scalars().all()} + .distinct(project.c.session_id) + .where(project.c.status != None) + ) - for vers in data.keys(): - total = await session.execute( - select(func.count(distinct(p.c.session_id))) - .where(p.c.is_ci == False) - .where(p.c.version == vers) - ) - data[vers]['total_runs'] = total.scalar() - success = await session.execute( - select(func.count(distinct(p.c.session_id))) - .where(p.c.is_ci == False) - .where(p.c.version == vers) - .where(text("status='C'")) - ) - data[vers]['successful_runs'] = success.scalar() - uusers = await session.execute( - select(func.count(distinct(p.c.user_id))) - .where(p.c.is_ci == False) - .where(p.c.version == vers) + if version: + subq0 = ( + subq0.where(project.c.version == version) + ) + else: + # Filter out "unofficial" versions + subq0 = ( + subq0 + .where(project.c.version.not_like('%+%')) + .where(project.c.version.not_like('%rc%')) + ) + subq0 = subq0.subquery() + + + subq1 = ( + select( + subq0.c.version, + subq0.c.date, + subq0.c.status, + func.count().label("status_sum") + ) + .group_by(subq0.c.status, subq0.c.date, subq0.c.version) + # .order_by(subq.c.date.desc(), subq.c.version.desc()) + .subquery() + ) + + complete = case((subq1.c.status == 'C', subq1.c.status_sum), else_=0) + failed = case((subq1.c.status == 'F', subq1.c.status_sum), else_=0) + suspended = case((subq1.c.status == 'S', subq1.c.status_sum), else_=0) + incomplete = case((subq1.c.status == 'R', subq1.c.status_sum), else_=0) + + async with gen_session() as session: + + # Group subquery into groups composed of: + # + date = await session.execute( + select( + subq1.c.version, + subq1.c.date, + func.max(complete).label('complete'), + func.max(failed).label('failed'), + func.max(suspended).label('suspended'), + func.max(incomplete).label('incomplete') ) - data[vers]['unique_users'] = uusers.scalar() - return data + .group_by(subq1.c.version, subq1.c.date) + .order_by(subq1.c.version.desc(), subq1.c.date.desc()) + ) + return date.all() async def verify_token(token: str) -> tuple[bool, list[str]]: From 55daef43fedfcda66ffbfa023d0a101409d0094e Mon Sep 17 00:00:00 2001 From: mathiasg Date: Wed, 13 Dec 2023 16:34:39 -0500 Subject: [PATCH 2/7] FIX: Remove burdensome relationship between project/users tables --- migas/server/models.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/migas/server/models.py b/migas/server/models.py index d543b18..d0dc884 100644 --- a/migas/server/models.py +++ b/migas/server/models.py @@ -59,7 +59,7 @@ class Authentication(Base): async def get_project_tables( project: str, create: bool = True -) -> tuple[Table | None, Table | None]: +) -> tuple[Table, Table]: """ Return `Project` and `Users` tables pertaining to input `project`. @@ -96,9 +96,9 @@ async def get_project_tables( '__tablename__': users_tablename, }, ) - # assign relationships once both are defined - ProjectModel.users = relationship(users_class_name, back_populates='project') - UsersModel.project = relationship(project_class_name, back_populates='users') + # # assign relationships once both are defined + # ProjectModel.users = relationship(users_class_name, back_populates='project') + # UsersModel.project = relationship(project_class_name, back_populates='users') users_table = tables[users_fullname] project_table = tables[project_fullname] @@ -108,17 +108,19 @@ async def get_project_tables( raise RuntimeError(f'Missing required table for {project}') if tables_to_create: - from .connections import get_db_engine + await create_tables(tables_to_create) - engine = await get_db_engine() + return project_table, users_table - def _create_tables(conn) -> None: - return Base.metadata.create_all(conn, tables=tables_to_create) - async with engine.begin() as conn: - await conn.run_sync(_create_tables) +async def create_tables(tables: list) -> None: + from .connections import get_db_engine - return project_table, users_table + engine = await get_db_engine() + async with engine.begin() as conn: + def _create_tables(conn) -> None: + return Base.metadata.create_all(conn, tables=tables) + await conn.run_sync(_create_tables) async def populate_base(conn: AsyncConnection) -> None: From a0cc38ce27e2f82b2969dfed12cb874d2b2afcfb Mon Sep 17 00:00:00 2001 From: mathiasg Date: Wed, 13 Dec 2023 16:36:07 -0500 Subject: [PATCH 3/7] REWORK: Richen data available from `usage_stats()` --- migas/server/schema.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/migas/server/schema.py b/migas/server/schema.py index 6ce2d4d..55547bf 100644 --- a/migas/server/schema.py +++ b/migas/server/schema.py @@ -1,4 +1,6 @@ +from collections import defaultdict import os +import typing as ty import strawberry from fastapi import Request, Response @@ -107,12 +109,33 @@ async def login(self, token: str) -> AuthenticationResult: ) @strawberry.field - async def usage_stats(self, project: str, token: str) -> JSON: + async def usage_stats( + self, + project: str, + token: str, + version: str | None = None, + date_group: str = 'month', # TODO: Literal incompatibility with strawberry - enum? + ) -> JSON: 'Generate different usage information' _, projects = await verify_token(token) if project not in projects: raise Exception('Invalid token.') - return await get_viz_data(project) + usage = await get_viz_data(project, version, date_group) + + data = {} + for ver, date, comp, fail, susp, inc in usage: + if ver not in data: + data[ver] = {} + data[ver]['date_grouping'] = date_group + for f in ('dates', 'completed', 'failed', 'suspended', 'incomplete'): + data[ver][f] = [] + + data[ver]['dates'].append(date) + data[ver]['completed'].append(comp) + data[ver]['failed'].append(fail) + data[ver]['suspended'].append(susp) + data[ver]['incomplete'].append(inc) + return data @strawberry.type From 5fb57f0707e5b59cade66e0b2b5528bf306334dd Mon Sep 17 00:00:00 2001 From: mathiasg Date: Tue, 9 Jan 2024 18:00:00 -0500 Subject: [PATCH 4/7] ENH: Add day format support to viz data query --- migas/server/database.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/migas/server/database.py b/migas/server/database.py index 447cfae..6129fbf 100644 --- a/migas/server/database.py +++ b/migas/server/database.py @@ -159,7 +159,7 @@ async def project_exists(project: str) -> bool: async def get_viz_data( project_name: str, version: str | None = None, - date_group: ty.Literal['week', 'month', 'year'] = 'month' + date_group: ty.Literal['day', 'week', 'month', 'year'] = 'month' ) -> list: """ Filter project usage into groups, based on versions and dates. @@ -167,8 +167,10 @@ async def get_viz_data( project, _ = await get_project_tables(project_name) match date_group: + case 'day': + datefmt = 'YYYY-MM-DD' case 'week': - datefmt = 'YYYY-WW' + datefmt = 'YYYY-WW' #? case 'month': datefmt = 'YYYY-MM' case 'year': From 4e5b748235bcb926ae402f02359c7aa95e5c1dc7 Mon Sep 17 00:00:00 2001 From: mathiasg Date: Tue, 9 Jan 2024 18:05:04 -0500 Subject: [PATCH 5/7] RF: Rework data structure for easier plotting --- migas/server/schema.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/migas/server/schema.py b/migas/server/schema.py index 55547bf..7d36c51 100644 --- a/migas/server/schema.py +++ b/migas/server/schema.py @@ -114,7 +114,7 @@ async def usage_stats( project: str, token: str, version: str | None = None, - date_group: str = 'month', # TODO: Literal incompatibility with strawberry - enum? + date_group: str = 'day', # TODO: ty.Literal incompatibility with strawberry - enum? ) -> JSON: 'Generate different usage information' _, projects = await verify_token(token) @@ -122,19 +122,19 @@ async def usage_stats( raise Exception('Invalid token.') usage = await get_viz_data(project, version, date_group) - data = {} + data = {'versions': [], 'grouping': date_group, 'timeseries': []} for ver, date, comp, fail, susp, inc in usage: - if ver not in data: - data[ver] = {} - data[ver]['date_grouping'] = date_group - for f in ('dates', 'completed', 'failed', 'suspended', 'incomplete'): - data[ver][f] = [] - - data[ver]['dates'].append(date) - data[ver]['completed'].append(comp) - data[ver]['failed'].append(fail) - data[ver]['suspended'].append(susp) - data[ver]['incomplete'].append(inc) + if ver not in data['versions']: + data['versions'].append(ver) + data['timeseries'].append({ + 'version': ver, + 'date': date, + 'completed': comp, + 'failed': fail, + 'suspended': susp, + 'incomplete': inc, + }) + return data From ee58734c2041430f0938809b433916fe74ca9df1 Mon Sep 17 00:00:00 2001 From: mathiasg Date: Tue, 9 Jan 2024 18:05:59 -0500 Subject: [PATCH 6/7] ENH: Improve visualizations The existing chart did not serve of much use. This adds 3 linked charts to a project: - Stacked bar chart showing counts for the various statuses, configurable to be displayed across different time interval buckets. - A sliding window range chart to control the view of the other two charts. - A line chart showing daily success rate. TODO: a versions filter is still needed. --- migas/static/charts.html | 388 +++++++++++++++++++++++++++++---------- 1 file changed, 291 insertions(+), 97 deletions(-) diff --git a/migas/static/charts.html b/migas/static/charts.html index bf0cf33..45d4cba 100644 --- a/migas/static/charts.html +++ b/migas/static/charts.html @@ -3,133 +3,327 @@ Project usage - + - - + + -
+
- -
-
- + +
+
+ Project Usage By: + + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+ selected out of records | Reset All +
+
+
+
+
-
+ - + + From e494deda807b8d473181080272ce8061088b6841 Mon Sep 17 00:00:00 2001 From: mathiasg Date: Fri, 2 Feb 2024 11:09:54 -0500 Subject: [PATCH 7/7] FIX: Interval selection --- migas/static/charts.html | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/migas/static/charts.html b/migas/static/charts.html index 45d4cba..6794649 100644 --- a/migas/static/charts.html +++ b/migas/static/charts.html @@ -161,8 +161,6 @@ function generateCharts(data) { - const intervalSel = document.getElementById("usage-interval"); - const interval = intervalSel.options[intervalSel.selectedIndex].value; const ndx = crossfilter(data); const all = ndx.groupAll(); @@ -174,15 +172,17 @@ const volumebyDay = dayDim.group().reduceSum(d => d.total / 100); - // make rendering dynamic intervals = { day: [dayDim, d3.timeDays], week: [weekDim, d3.timeWeeks], month: [monthDim, d3.timeMonths], year: [yearDim, d3.timeYears] } - const intervalDim = intervals[interval][0] - const intervalXAxis = intervals[interval][1] + // adjust interval based on select element + const intervalSel = document.getElementById("usage-interval"); + let interval = intervalSel.options[intervalSel.selectedIndex].value; + let intervalDim = intervals[interval][0] + let intervalXAxis = intervals[interval][1] const compGroup = intervalDim.group().reduceSum(d => d.completed); const failGroup = intervalDim.group().reduceSum(d => d.failed);