From 68b5dcf9b6aae36135af0789111365cf4037a612 Mon Sep 17 00:00:00 2001 From: Mathias Goncalves Date: Fri, 2 Feb 2024 11:10:47 -0500 Subject: [PATCH] ENH: Improve usage statistics produce, refine visualization (#85) * ENH: Improve project usage querying Reduces # of queries made to the database, with better data-munging. * FIX: Remove burdensome relationship between project/users tables * REWORK: Richen data available from `usage_stats()` * ENH: Add day format support to viz data query * RF: Rework data structure for easier plotting * ENH: Improve visualizations The existing chart did not serve of much use. This adds 3 linked charts to a project: - Stacked bar chart showing counts for the various statuses, configurable to be displayed across different time interval buckets. - A sliding window range chart to control the view of the other two charts. - A line chart showing daily success rate. TODO: a versions filter is still needed. * FIX: Interval selection --- migas/server/database.py | 125 ++++++++----- migas/server/models.py | 24 +-- migas/server/schema.py | 27 ++- migas/static/charts.html | 388 +++++++++++++++++++++++++++++---------- 4 files changed, 410 insertions(+), 154 deletions(-) diff --git a/migas/server/database.py b/migas/server/database.py index bfaa3e3..6129fbf 100644 --- a/migas/server/database.py +++ b/migas/server/database.py @@ -1,7 +1,7 @@ -from typing import List +import typing as ty # from asyncpg import Record -from sqlalchemy import distinct, func, select, text +from sqlalchemy import distinct, func, select, case, desc from sqlalchemy.dialects.postgresql import insert from .models import Table, gen_session, get_project_tables, projects @@ -144,7 +144,7 @@ async def query_usage_unique(project: Table) -> int: return res.scalars().one() -async def query_projects() -> List[str]: +async def query_projects() -> list[str]: async with gen_session() as session: res = await session.execute(select(projects.c.project)) return res.scalars().all() @@ -156,53 +156,90 @@ async def project_exists(project: str) -> bool: return bool(res.one_or_none()) -async def get_viz_data(project: str) -> list: +async def get_viz_data( + project_name: str, + version: str | None = None, + date_group: ty.Literal['day', 'week', 'month', 'year'] = 'month' +) -> list: """ - TODO: Implement bucket sorting. - - Implements the following SQL pseudocode: - - select distinct version from where version not like '%+%'; - - for vers in ^: - - select count(distinct session_id) from where is_ci = false and version = ver; - - select count(distinct session_id) from where is_ci = false and version = ver and status = 'C'; - - select count(distinct user_id) from where is_ci = false and version = ver; - - select count(*), date_part('isoyear', timestamp) as year, date_part('week', timestamp) as week from where status = 'C' group by year, week order by year, week; + Filter project usage into groups, based on versions and dates. """ - p, _ = await get_project_tables(project) + project, _ = await get_project_tables(project_name) - async with gen_session() as session: - # we want to return a table with: - # version | total_runs (unique session_id) | sucessful_runs | users (unique user_id) - # TODO: index should be applied to version, session_id, user_id columns - # TODO: this should be done within a single query - - # first grab all different versions - versions = await session.execute( - select(distinct(p.c.version)).where(p.c.version.not_like('%+%')) + match date_group: + case 'day': + datefmt = 'YYYY-MM-DD' + case 'week': + datefmt = 'YYYY-WW' #? + case 'month': + datefmt = 'YYYY-MM' + case 'year': + datefmt = 'YYYY' + case _: + raise NotImplementedError + + # Create a subquery to: + # - filter out version(s) + # - convert timestamps into YEAR-WEEK values + subq0 = ( + select( + project.c.version, + project.c.session_id, + func.to_char(project.c.timestamp, datefmt).label('date'), + project.c.status ) - data = {v: {} for v in versions.scalars().all()} + .distinct(project.c.session_id) + .where(project.c.status != None) + ) - for vers in data.keys(): - total = await session.execute( - select(func.count(distinct(p.c.session_id))) - .where(p.c.is_ci == False) - .where(p.c.version == vers) - ) - data[vers]['total_runs'] = total.scalar() - success = await session.execute( - select(func.count(distinct(p.c.session_id))) - .where(p.c.is_ci == False) - .where(p.c.version == vers) - .where(text("status='C'")) - ) - data[vers]['successful_runs'] = success.scalar() - uusers = await session.execute( - select(func.count(distinct(p.c.user_id))) - .where(p.c.is_ci == False) - .where(p.c.version == vers) + if version: + subq0 = ( + subq0.where(project.c.version == version) + ) + else: + # Filter out "unofficial" versions + subq0 = ( + subq0 + .where(project.c.version.not_like('%+%')) + .where(project.c.version.not_like('%rc%')) + ) + subq0 = subq0.subquery() + + + subq1 = ( + select( + subq0.c.version, + subq0.c.date, + subq0.c.status, + func.count().label("status_sum") + ) + .group_by(subq0.c.status, subq0.c.date, subq0.c.version) + # .order_by(subq.c.date.desc(), subq.c.version.desc()) + .subquery() + ) + + complete = case((subq1.c.status == 'C', subq1.c.status_sum), else_=0) + failed = case((subq1.c.status == 'F', subq1.c.status_sum), else_=0) + suspended = case((subq1.c.status == 'S', subq1.c.status_sum), else_=0) + incomplete = case((subq1.c.status == 'R', subq1.c.status_sum), else_=0) + + async with gen_session() as session: + + # Group subquery into groups composed of: + # + date = await session.execute( + select( + subq1.c.version, + subq1.c.date, + func.max(complete).label('complete'), + func.max(failed).label('failed'), + func.max(suspended).label('suspended'), + func.max(incomplete).label('incomplete') ) - data[vers]['unique_users'] = uusers.scalar() - return data + .group_by(subq1.c.version, subq1.c.date) + .order_by(subq1.c.version.desc(), subq1.c.date.desc()) + ) + return date.all() async def verify_token(token: str) -> tuple[bool, list[str]]: diff --git a/migas/server/models.py b/migas/server/models.py index d543b18..d0dc884 100644 --- a/migas/server/models.py +++ b/migas/server/models.py @@ -59,7 +59,7 @@ class Authentication(Base): async def get_project_tables( project: str, create: bool = True -) -> tuple[Table | None, Table | None]: +) -> tuple[Table, Table]: """ Return `Project` and `Users` tables pertaining to input `project`. @@ -96,9 +96,9 @@ async def get_project_tables( '__tablename__': users_tablename, }, ) - # assign relationships once both are defined - ProjectModel.users = relationship(users_class_name, back_populates='project') - UsersModel.project = relationship(project_class_name, back_populates='users') + # # assign relationships once both are defined + # ProjectModel.users = relationship(users_class_name, back_populates='project') + # UsersModel.project = relationship(project_class_name, back_populates='users') users_table = tables[users_fullname] project_table = tables[project_fullname] @@ -108,17 +108,19 @@ async def get_project_tables( raise RuntimeError(f'Missing required table for {project}') if tables_to_create: - from .connections import get_db_engine + await create_tables(tables_to_create) - engine = await get_db_engine() + return project_table, users_table - def _create_tables(conn) -> None: - return Base.metadata.create_all(conn, tables=tables_to_create) - async with engine.begin() as conn: - await conn.run_sync(_create_tables) +async def create_tables(tables: list) -> None: + from .connections import get_db_engine - return project_table, users_table + engine = await get_db_engine() + async with engine.begin() as conn: + def _create_tables(conn) -> None: + return Base.metadata.create_all(conn, tables=tables) + await conn.run_sync(_create_tables) async def populate_base(conn: AsyncConnection) -> None: diff --git a/migas/server/schema.py b/migas/server/schema.py index 6ce2d4d..7d36c51 100644 --- a/migas/server/schema.py +++ b/migas/server/schema.py @@ -1,4 +1,6 @@ +from collections import defaultdict import os +import typing as ty import strawberry from fastapi import Request, Response @@ -107,12 +109,33 @@ async def login(self, token: str) -> AuthenticationResult: ) @strawberry.field - async def usage_stats(self, project: str, token: str) -> JSON: + async def usage_stats( + self, + project: str, + token: str, + version: str | None = None, + date_group: str = 'day', # TODO: ty.Literal incompatibility with strawberry - enum? + ) -> JSON: 'Generate different usage information' _, projects = await verify_token(token) if project not in projects: raise Exception('Invalid token.') - return await get_viz_data(project) + usage = await get_viz_data(project, version, date_group) + + data = {'versions': [], 'grouping': date_group, 'timeseries': []} + for ver, date, comp, fail, susp, inc in usage: + if ver not in data['versions']: + data['versions'].append(ver) + data['timeseries'].append({ + 'version': ver, + 'date': date, + 'completed': comp, + 'failed': fail, + 'suspended': susp, + 'incomplete': inc, + }) + + return data @strawberry.type diff --git a/migas/static/charts.html b/migas/static/charts.html index bf0cf33..6794649 100644 --- a/migas/static/charts.html +++ b/migas/static/charts.html @@ -3,133 +3,327 @@ Project usage - + - - + + -
+
- -
-
- + +
+
+ Project Usage By: + + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+ selected out of records | Reset All +
+
+
+
+
-
+ - + +