Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Improve usage statistics produce, refine visualization #85

Merged
merged 7 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 81 additions & 44 deletions migas/server/database.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List
import typing as ty

# from asyncpg import Record
from sqlalchemy import distinct, func, select, text
from sqlalchemy import distinct, func, select, case, desc
from sqlalchemy.dialects.postgresql import insert

from .models import Table, gen_session, get_project_tables, projects
Expand Down Expand Up @@ -144,7 +144,7 @@ async def query_usage_unique(project: Table) -> int:
return res.scalars().one()


async def query_projects() -> List[str]:
async def query_projects() -> list[str]:
async with gen_session() as session:
res = await session.execute(select(projects.c.project))
return res.scalars().all()
Expand All @@ -156,53 +156,90 @@ async def project_exists(project: str) -> bool:
return bool(res.one_or_none())


async def get_viz_data(project: str) -> list:
async def get_viz_data(
project_name: str,
version: str | None = None,
date_group: ty.Literal['day', 'week', 'month', 'year'] = 'month'
) -> list:
"""
TODO: Implement bucket sorting.

Implements the following SQL pseudocode:
- select distinct version from <project> where version not like '%+%';
- for vers in ^:
- select count(distinct session_id) from <project> where is_ci = false and version = ver;
- select count(distinct session_id) from <project> where is_ci = false and version = ver and status = 'C';
- select count(distinct user_id) from <project> where is_ci = false and version = ver;
- select count(*), date_part('isoyear', timestamp) as year, date_part('week', timestamp) as week from <project> where status = 'C' group by year, week order by year, week;
Filter project usage into groups, based on versions and dates.
"""
p, _ = await get_project_tables(project)
project, _ = await get_project_tables(project_name)

async with gen_session() as session:
# we want to return a table with:
# version | total_runs (unique session_id) | sucessful_runs | users (unique user_id)
# TODO: index should be applied to version, session_id, user_id columns
# TODO: this should be done within a single query

# first grab all different versions
versions = await session.execute(
select(distinct(p.c.version)).where(p.c.version.not_like('%+%'))
match date_group:
case 'day':
datefmt = 'YYYY-MM-DD'
case 'week':
datefmt = 'YYYY-WW' #?
case 'month':
datefmt = 'YYYY-MM'
case 'year':
datefmt = 'YYYY'
case _:
raise NotImplementedError

# Create a subquery to:
# - filter out version(s)
# - convert timestamps into YEAR-WEEK values
subq0 = (
select(
project.c.version,
project.c.session_id,
func.to_char(project.c.timestamp, datefmt).label('date'),
project.c.status
)
data = {v: {} for v in versions.scalars().all()}
.distinct(project.c.session_id)
.where(project.c.status != None)
)

for vers in data.keys():
total = await session.execute(
select(func.count(distinct(p.c.session_id)))
.where(p.c.is_ci == False)
.where(p.c.version == vers)
)
data[vers]['total_runs'] = total.scalar()
success = await session.execute(
select(func.count(distinct(p.c.session_id)))
.where(p.c.is_ci == False)
.where(p.c.version == vers)
.where(text("status='C'"))
)
data[vers]['successful_runs'] = success.scalar()
uusers = await session.execute(
select(func.count(distinct(p.c.user_id)))
.where(p.c.is_ci == False)
.where(p.c.version == vers)
if version:
subq0 = (
subq0.where(project.c.version == version)
)
else:
# Filter out "unofficial" versions
subq0 = (
subq0
.where(project.c.version.not_like('%+%'))
.where(project.c.version.not_like('%rc%'))
)
subq0 = subq0.subquery()


subq1 = (
select(
subq0.c.version,
subq0.c.date,
subq0.c.status,
func.count().label("status_sum")
)
.group_by(subq0.c.status, subq0.c.date, subq0.c.version)
# .order_by(subq.c.date.desc(), subq.c.version.desc())
.subquery()
)

complete = case((subq1.c.status == 'C', subq1.c.status_sum), else_=0)
failed = case((subq1.c.status == 'F', subq1.c.status_sum), else_=0)
suspended = case((subq1.c.status == 'S', subq1.c.status_sum), else_=0)
incomplete = case((subq1.c.status == 'R', subq1.c.status_sum), else_=0)

async with gen_session() as session:

# Group subquery into groups composed of:
# <version> <date> <status> <count>
date = await session.execute(
select(
subq1.c.version,
subq1.c.date,
func.max(complete).label('complete'),
func.max(failed).label('failed'),
func.max(suspended).label('suspended'),
func.max(incomplete).label('incomplete')
)
data[vers]['unique_users'] = uusers.scalar()
return data
.group_by(subq1.c.version, subq1.c.date)
.order_by(subq1.c.version.desc(), subq1.c.date.desc())
)
return date.all()


async def verify_token(token: str) -> tuple[bool, list[str]]:
Expand Down
24 changes: 13 additions & 11 deletions migas/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class Authentication(Base):

async def get_project_tables(
project: str, create: bool = True
) -> tuple[Table | None, Table | None]:
) -> tuple[Table, Table]:
"""
Return `Project` and `Users` tables pertaining to input `project`.

Expand Down Expand Up @@ -96,9 +96,9 @@ async def get_project_tables(
'__tablename__': users_tablename,
},
)
# assign relationships once both are defined
ProjectModel.users = relationship(users_class_name, back_populates='project')
UsersModel.project = relationship(project_class_name, back_populates='users')
# # assign relationships once both are defined
# ProjectModel.users = relationship(users_class_name, back_populates='project')
# UsersModel.project = relationship(project_class_name, back_populates='users')

users_table = tables[users_fullname]
project_table = tables[project_fullname]
Expand All @@ -108,17 +108,19 @@ async def get_project_tables(
raise RuntimeError(f'Missing required table for {project}')

if tables_to_create:
from .connections import get_db_engine
await create_tables(tables_to_create)

engine = await get_db_engine()
return project_table, users_table

def _create_tables(conn) -> None:
return Base.metadata.create_all(conn, tables=tables_to_create)

async with engine.begin() as conn:
await conn.run_sync(_create_tables)
async def create_tables(tables: list) -> None:
from .connections import get_db_engine

return project_table, users_table
engine = await get_db_engine()
async with engine.begin() as conn:
def _create_tables(conn) -> None:
return Base.metadata.create_all(conn, tables=tables)
await conn.run_sync(_create_tables)


async def populate_base(conn: AsyncConnection) -> None:
Expand Down
27 changes: 25 additions & 2 deletions migas/server/schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from collections import defaultdict
import os
import typing as ty

import strawberry
from fastapi import Request, Response
Expand Down Expand Up @@ -107,12 +109,33 @@ async def login(self, token: str) -> AuthenticationResult:
)

@strawberry.field
async def usage_stats(self, project: str, token: str) -> JSON:
async def usage_stats(
self,
project: str,
token: str,
version: str | None = None,
date_group: str = 'day', # TODO: ty.Literal incompatibility with strawberry - enum?
) -> JSON:
'Generate different usage information'
_, projects = await verify_token(token)
if project not in projects:
raise Exception('Invalid token.')
return await get_viz_data(project)
usage = await get_viz_data(project, version, date_group)

data = {'versions': [], 'grouping': date_group, 'timeseries': []}
for ver, date, comp, fail, susp, inc in usage:
if ver not in data['versions']:
data['versions'].append(ver)
data['timeseries'].append({
'version': ver,
'date': date,
'completed': comp,
'failed': fail,
'suspended': susp,
'incomplete': inc,
})

return data


@strawberry.type
Expand Down
Loading
Loading