Skip to content

Commit

Permalink
ENH: Improve usage statistics produce, refine visualization (#85)
Browse files Browse the repository at this point in the history
* ENH: Improve project usage querying

Reduces # of queries made to the database, with better data-munging.

* FIX: Remove burdensome relationship between project/users tables

* REWORK: Richen data available from `usage_stats()`

* ENH: Add day format support to viz data query

* RF: Rework data structure for easier plotting

* ENH: Improve visualizations

The existing chart did not serve of much use.
This adds 3 linked charts to a project:
- Stacked bar chart showing counts for the various statuses, configurable to be displayed across different time interval buckets.
- A sliding window range chart to control the view of the other two charts.
- A line chart showing daily success rate.

TODO: a versions filter is still needed.

* FIX: Interval selection
  • Loading branch information
mgxd authored Feb 2, 2024
1 parent f1860ca commit 68b5dcf
Show file tree
Hide file tree
Showing 4 changed files with 410 additions and 154 deletions.
125 changes: 81 additions & 44 deletions migas/server/database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List
import typing as ty

# from asyncpg import Record
from sqlalchemy import distinct, func, select, text
from sqlalchemy import distinct, func, select, case, desc
from sqlalchemy.dialects.postgresql import insert

from .models import Table, gen_session, get_project_tables, projects
Expand Down Expand Up @@ -144,7 +144,7 @@ async def query_usage_unique(project: Table) -> int:
return res.scalars().one()


async def query_projects() -> List[str]:
async def query_projects() -> list[str]:
async with gen_session() as session:
res = await session.execute(select(projects.c.project))
return res.scalars().all()
Expand All @@ -156,53 +156,90 @@ async def project_exists(project: str) -> bool:
return bool(res.one_or_none())


async def get_viz_data(project: str) -> list:
async def get_viz_data(
project_name: str,
version: str | None = None,
date_group: ty.Literal['day', 'week', 'month', 'year'] = 'month'
) -> list:
"""
TODO: Implement bucket sorting.
Implements the following SQL pseudocode:
- select distinct version from <project> where version not like '%+%';
- for vers in ^:
- select count(distinct session_id) from <project> where is_ci = false and version = ver;
- select count(distinct session_id) from <project> where is_ci = false and version = ver and status = 'C';
- select count(distinct user_id) from <project> where is_ci = false and version = ver;
- select count(*), date_part('isoyear', timestamp) as year, date_part('week', timestamp) as week from <project> where status = 'C' group by year, week order by year, week;
Filter project usage into groups, based on versions and dates.
"""
p, _ = await get_project_tables(project)
project, _ = await get_project_tables(project_name)

async with gen_session() as session:
# we want to return a table with:
# version | total_runs (unique session_id) | sucessful_runs | users (unique user_id)
# TODO: index should be applied to version, session_id, user_id columns
# TODO: this should be done within a single query

# first grab all different versions
versions = await session.execute(
select(distinct(p.c.version)).where(p.c.version.not_like('%+%'))
match date_group:
case 'day':
datefmt = 'YYYY-MM-DD'
case 'week':
datefmt = 'YYYY-WW' #?
case 'month':
datefmt = 'YYYY-MM'
case 'year':
datefmt = 'YYYY'
case _:
raise NotImplementedError

# Create a subquery to:
# - filter out version(s)
# - convert timestamps into YEAR-WEEK values
subq0 = (
select(
project.c.version,
project.c.session_id,
func.to_char(project.c.timestamp, datefmt).label('date'),
project.c.status
)
data = {v: {} for v in versions.scalars().all()}
.distinct(project.c.session_id)
.where(project.c.status != None)
)

for vers in data.keys():
total = await session.execute(
select(func.count(distinct(p.c.session_id)))
.where(p.c.is_ci == False)
.where(p.c.version == vers)
)
data[vers]['total_runs'] = total.scalar()
success = await session.execute(
select(func.count(distinct(p.c.session_id)))
.where(p.c.is_ci == False)
.where(p.c.version == vers)
.where(text("status='C'"))
)
data[vers]['successful_runs'] = success.scalar()
uusers = await session.execute(
select(func.count(distinct(p.c.user_id)))
.where(p.c.is_ci == False)
.where(p.c.version == vers)
if version:
subq0 = (
subq0.where(project.c.version == version)
)
else:
# Filter out "unofficial" versions
subq0 = (
subq0
.where(project.c.version.not_like('%+%'))
.where(project.c.version.not_like('%rc%'))
)
subq0 = subq0.subquery()


subq1 = (
select(
subq0.c.version,
subq0.c.date,
subq0.c.status,
func.count().label("status_sum")
)
.group_by(subq0.c.status, subq0.c.date, subq0.c.version)
# .order_by(subq.c.date.desc(), subq.c.version.desc())
.subquery()
)

complete = case((subq1.c.status == 'C', subq1.c.status_sum), else_=0)
failed = case((subq1.c.status == 'F', subq1.c.status_sum), else_=0)
suspended = case((subq1.c.status == 'S', subq1.c.status_sum), else_=0)
incomplete = case((subq1.c.status == 'R', subq1.c.status_sum), else_=0)

async with gen_session() as session:

# Group subquery into groups composed of:
# <version> <date> <status> <count>
date = await session.execute(
select(
subq1.c.version,
subq1.c.date,
func.max(complete).label('complete'),
func.max(failed).label('failed'),
func.max(suspended).label('suspended'),
func.max(incomplete).label('incomplete')
)
data[vers]['unique_users'] = uusers.scalar()
return data
.group_by(subq1.c.version, subq1.c.date)
.order_by(subq1.c.version.desc(), subq1.c.date.desc())
)
return date.all()


async def verify_token(token: str) -> tuple[bool, list[str]]:
Expand Down
24 changes: 13 additions & 11 deletions migas/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class Authentication(Base):

async def get_project_tables(
project: str, create: bool = True
) -> tuple[Table | None, Table | None]:
) -> tuple[Table, Table]:
"""
Return `Project` and `Users` tables pertaining to input `project`.
Expand Down Expand Up @@ -96,9 +96,9 @@ async def get_project_tables(
'__tablename__': users_tablename,
},
)
# assign relationships once both are defined
ProjectModel.users = relationship(users_class_name, back_populates='project')
UsersModel.project = relationship(project_class_name, back_populates='users')
# # assign relationships once both are defined
# ProjectModel.users = relationship(users_class_name, back_populates='project')
# UsersModel.project = relationship(project_class_name, back_populates='users')

users_table = tables[users_fullname]
project_table = tables[project_fullname]
Expand All @@ -108,17 +108,19 @@ async def get_project_tables(
raise RuntimeError(f'Missing required table for {project}')

if tables_to_create:
from .connections import get_db_engine
await create_tables(tables_to_create)

engine = await get_db_engine()
return project_table, users_table

def _create_tables(conn) -> None:
return Base.metadata.create_all(conn, tables=tables_to_create)

async with engine.begin() as conn:
await conn.run_sync(_create_tables)
async def create_tables(tables: list) -> None:
from .connections import get_db_engine

return project_table, users_table
engine = await get_db_engine()
async with engine.begin() as conn:
def _create_tables(conn) -> None:
return Base.metadata.create_all(conn, tables=tables)
await conn.run_sync(_create_tables)


async def populate_base(conn: AsyncConnection) -> None:
Expand Down
27 changes: 25 additions & 2 deletions migas/server/schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from collections import defaultdict
import os
import typing as ty

import strawberry
from fastapi import Request, Response
Expand Down Expand Up @@ -107,12 +109,33 @@ async def login(self, token: str) -> AuthenticationResult:
)

@strawberry.field
async def usage_stats(self, project: str, token: str) -> JSON:
async def usage_stats(
self,
project: str,
token: str,
version: str | None = None,
date_group: str = 'day', # TODO: ty.Literal incompatibility with strawberry - enum?
) -> JSON:
'Generate different usage information'
_, projects = await verify_token(token)
if project not in projects:
raise Exception('Invalid token.')
return await get_viz_data(project)
usage = await get_viz_data(project, version, date_group)

data = {'versions': [], 'grouping': date_group, 'timeseries': []}
for ver, date, comp, fail, susp, inc in usage:
if ver not in data['versions']:
data['versions'].append(ver)
data['timeseries'].append({
'version': ver,
'date': date,
'completed': comp,
'failed': fail,
'suspended': susp,
'incomplete': inc,
})

return data


@strawberry.type
Expand Down
Loading

0 comments on commit 68b5dcf

Please sign in to comment.