From 54daf1c43df0345dc6c8f251548fc66252adb889 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 8 Aug 2023 15:03:10 -0700 Subject: [PATCH 01/17] add charts endpoints --- carbonplan_offsets_db/main.py | 3 ++- carbonplan_offsets_db/routers/charts.py | 35 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 carbonplan_offsets_db/routers/charts.py diff --git a/carbonplan_offsets_db/main.py b/carbonplan_offsets_db/main.py index 83b178d..58f8a91 100644 --- a/carbonplan_offsets_db/main.py +++ b/carbonplan_offsets_db/main.py @@ -7,7 +7,7 @@ from .app_metadata import metadata from .logging import get_logger from .models import Credit, Project -from .routers import credits, files, health, projects +from .routers import charts, credits, files, health, projects from .settings import get_settings from .tasks import calculate_totals, export_table_to_csv, update_credit_stats, update_project_stats @@ -33,6 +33,7 @@ def create_application() -> FastAPI: application.include_router(health.router, prefix='/health', tags=['health']) application.include_router(projects.router, prefix='/projects', tags=['projects']) application.include_router(credits.router, prefix='/credits', tags=['credits']) + application.include_router(charts.router, prefix='/charts', tags=['charts']) application.include_router(files.router, prefix='/files', tags=['files']) return application diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py new file mode 100644 index 0000000..669fea2 --- /dev/null +++ b/carbonplan_offsets_db/routers/charts.py @@ -0,0 +1,35 @@ +from fastapi import APIRouter, Depends, Request +from sqlmodel import Session, text + +from ..database import get_session +from ..logging import get_logger + +router = APIRouter() +logger = get_logger() + + +@router.get('/project_registration') +def get_project_registration(request: Request, session: Session = Depends(get_session)): + """Get project registration data""" + logger.info(f'Getting project registration data: {request.url}') + stmt = text( + """ + SELECT + width_bucket( + extract(year FROM age(now(), registered_at)), + 0, + 2, + 15 + ) AS bin, + count(*) + FROM + project + GROUP BY + bin + ORDER BY + bin + """ + ) + + result = session.execute(stmt) + return result.all() From 042f87b999c3ff3adc0756878928be4ecc30b5aa Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 8 Aug 2023 17:07:40 -0700 Subject: [PATCH 02/17] add function for getting binned data --- carbonplan_offsets_db/routers/charts.py | 76 ++++++++++++++++++------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 669fea2..0914f9e 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -1,35 +1,69 @@ +import datetime + from fastapi import APIRouter, Depends, Request -from sqlmodel import Session, text +from sqlmodel import Session, and_, case, func from ..database import get_session from ..logging import get_logger +from ..models import Project router = APIRouter() logger = get_logger() +def get_binned_data(session, num_bins): + # Get the min and max date from registered_at + min_date, max_date = session.query( + func.min(Project.registered_at), func.max(Project.registered_at) + ).one() + + # Calculate the bin width + bin_width = (max_date - min_date) / num_bins + + # Define the binning logic using a combination of the CASE statement and basic arithmetic + conditions = [ + ( + and_( + Project.registered_at >= min_date + i * bin_width, + Project.registered_at < min_date + (i + 1) * bin_width, + ), + f'{(min_date + i*bin_width).year}-{(min_date + (i+1)*bin_width).year}', + ) + for i in range(num_bins - 1) + ] + last_bin_label = ( + f'{(min_date + (num_bins-1)*bin_width).year}-present' + if max_date.year == datetime.datetime.now().year + else f'{(min_date + (num_bins-1)*bin_width).year}-{max_date.year}' + ) + conditions.append( + (Project.registered_at >= min_date + (num_bins - 1) * bin_width, last_bin_label) + ) + + binned_date = case(conditions, else_='other').label('bin') + + # Query with the binning logic + binned_results = ( + session.query(binned_date, Project.category, func.count(Project.id).label('count')) + .group_by(binned_date, Project.category) + .all() + ) + + total_projects = session.query(Project).count() + total_binned_counts = sum(result[2] for result in binned_results) + + if total_projects != total_binned_counts: + raise ValueError( + f'Total projects ({total_projects}) does not match sum of binned counts ({total_binned_counts}).' + ) + + return binned_results + + @router.get('/project_registration') def get_project_registration(request: Request, session: Session = Depends(get_session)): """Get project registration data""" logger.info(f'Getting project registration data: {request.url}') - stmt = text( - """ - SELECT - width_bucket( - extract(year FROM age(now(), registered_at)), - 0, - 2, - 15 - ) AS bin, - count(*) - FROM - project - GROUP BY - bin - ORDER BY - bin - """ - ) - result = session.execute(stmt) - return result.all() + results = get_binned_data(session, 15) + return results From 46cc12f30bbc8a5e68aed923b3c6e18acb9903b2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 8 Aug 2023 17:12:15 -0700 Subject: [PATCH 03/17] add num_bins to query parameters --- carbonplan_offsets_db/routers/charts.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 0914f9e..6a1a35c 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -1,6 +1,6 @@ import datetime -from fastapi import APIRouter, Depends, Request +from fastapi import APIRouter, Depends, Query, Request from sqlmodel import Session, and_, case, func from ..database import get_session @@ -11,7 +11,7 @@ logger = get_logger() -def get_binned_data(session, num_bins): +def get_binned_data(*, session, num_bins): # Get the min and max date from registered_at min_date, max_date = session.query( func.min(Project.registered_at), func.max(Project.registered_at) @@ -61,9 +61,12 @@ def get_binned_data(session, num_bins): @router.get('/project_registration') -def get_project_registration(request: Request, session: Session = Depends(get_session)): - """Get project registration data""" +def get_project_registration( + request: Request, + num_bins: int = Query(15, description='The number of bins'), + session: Session = Depends(get_session), +): + """Get aggregated project registration data""" logger.info(f'Getting project registration data: {request.url}') - results = get_binned_data(session, 15) - return results + return get_binned_data(session=session, num_bins=num_bins) From 1800b76204cf8d62c218a9fea582e0f86ec3c209 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 8 Aug 2023 17:26:00 -0700 Subject: [PATCH 04/17] add comments --- carbonplan_offsets_db/routers/charts.py | 29 +++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 6a1a35c..1e2a56e 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -12,15 +12,32 @@ def get_binned_data(*, session, num_bins): - # Get the min and max date from registered_at + """ + This function bins the projects based on their registration date and groups them by category. + + Parameters + ---------- + session: Session + SQLAlchemy session for querying the database. + num_bins: int, + Number of bins to divide the registration dates into. + + Returns + ------- + binned_results: list + A list of tuples, each containing the bin label, category, and count of projects. + """ + + # Determine the earliest and latest registration dates in the database. min_date, max_date = session.query( func.min(Project.registered_at), func.max(Project.registered_at) ).one() - # Calculate the bin width + # Calculate the width of each bin by dividing the total date range by the number of bins. bin_width = (max_date - min_date) / num_bins - # Define the binning logic using a combination of the CASE statement and basic arithmetic + # Create conditions for each bin. Each condition checks if a project's registration date + # falls within the range defined by a bin's start and end dates. Also, assign a label for each bin. conditions = [ ( and_( @@ -31,6 +48,8 @@ def get_binned_data(*, session, num_bins): ) for i in range(num_bins - 1) ] + + # Handle the last bin separately to account for the possibility that the max_date is the current year. last_bin_label = ( f'{(min_date + (num_bins-1)*bin_width).year}-present' if max_date.year == datetime.datetime.now().year @@ -40,15 +59,17 @@ def get_binned_data(*, session, num_bins): (Project.registered_at >= min_date + (num_bins - 1) * bin_width, last_bin_label) ) + # Using the conditions, generate a CASE statement to assign a bin label to each project. binned_date = case(conditions, else_='other').label('bin') - # Query with the binning logic + # Execute the main query, grouping projects by bin and category, and counting the number of projects in each group. binned_results = ( session.query(binned_date, Project.category, func.count(Project.id).label('count')) .group_by(binned_date, Project.category) .all() ) + # Validate that the sum of counts from the binned results matches the total number of projects in the database. total_projects = session.query(Project).count() total_binned_counts = sum(result[2] for result in binned_results) From 1282e830830c0aebd47eddacd0b789a74611771a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 9 Aug 2023 14:05:26 -0700 Subject: [PATCH 05/17] remove the special `max_date.year` case --- carbonplan_offsets_db/routers/charts.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 1e2a56e..6739333 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -1,5 +1,3 @@ -import datetime - from fastapi import APIRouter, Depends, Query, Request from sqlmodel import Session, and_, case, func @@ -46,19 +44,9 @@ def get_binned_data(*, session, num_bins): ), f'{(min_date + i*bin_width).year}-{(min_date + (i+1)*bin_width).year}', ) - for i in range(num_bins - 1) + for i in range(num_bins) ] - # Handle the last bin separately to account for the possibility that the max_date is the current year. - last_bin_label = ( - f'{(min_date + (num_bins-1)*bin_width).year}-present' - if max_date.year == datetime.datetime.now().year - else f'{(min_date + (num_bins-1)*bin_width).year}-{max_date.year}' - ) - conditions.append( - (Project.registered_at >= min_date + (num_bins - 1) * bin_width, last_bin_label) - ) - # Using the conditions, generate a CASE statement to assign a bin label to each project. binned_date = case(conditions, else_='other').label('bin') From 03c8fec16699c369a1101cb2479d4237223da7e4 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 9 Aug 2023 14:29:37 -0700 Subject: [PATCH 06/17] return start and end instead of concatenated bin string per @katamartin's feedback --- carbonplan_offsets_db/models.py | 7 +++++++ carbonplan_offsets_db/routers/charts.py | 22 ++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/carbonplan_offsets_db/models.py b/carbonplan_offsets_db/models.py index e002370..9725c1d 100644 --- a/carbonplan_offsets_db/models.py +++ b/carbonplan_offsets_db/models.py @@ -139,3 +139,10 @@ class CreditStatsWithPagination(pydantic.BaseModel): class ProjectStatsWithPagination(pydantic.BaseModel): pagination: Pagination data: list[ProjectStats] + + +class ProjectBinnedRegistration(pydantic.BaseModel): + start: int | None + end: int | None + category: str | None + count: int | None diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 6739333..c837349 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -3,7 +3,7 @@ from ..database import get_session from ..logging import get_logger -from ..models import Project +from ..models import Project, ProjectBinnedRegistration router = APIRouter() logger = get_logger() @@ -26,6 +26,7 @@ def get_binned_data(*, session, num_bins): A list of tuples, each containing the bin label, category, and count of projects. """ + logger.info('📊 Generating binned data...') # Determine the earliest and latest registration dates in the database. min_date, max_date = session.query( func.min(Project.registered_at), func.max(Project.registered_at) @@ -53,23 +54,32 @@ def get_binned_data(*, session, num_bins): # Execute the main query, grouping projects by bin and category, and counting the number of projects in each group. binned_results = ( session.query(binned_date, Project.category, func.count(Project.id).label('count')) - .group_by(binned_date, Project.category) + .group_by('bin', Project.category) .all() ) # Validate that the sum of counts from the binned results matches the total number of projects in the database. total_projects = session.query(Project).count() total_binned_counts = sum(result[2] for result in binned_results) - if total_projects != total_binned_counts: + logger.error('❌ Mismatch in total counts!') raise ValueError( - f'Total projects ({total_projects}) does not match sum of binned counts ({total_binned_counts}).' + f"Total projects ({total_projects}) doesn't match sum of binned counts ({total_binned_counts})." + ) + + # Reformat results to a more concise representation. + formatted_results = [] + for bin_label, category, count in binned_results: + start, end = ( + (int(part) for part in bin_label.split('-')) if '-' in bin_label else (None, None) ) + formatted_results.append({'start': start, 'end': end, 'category': category, 'count': count}) - return binned_results + logger.info('✅ Binned data generated successfully!') + return formatted_results -@router.get('/project_registration') +@router.get('/project_registration', response_model=list[ProjectBinnedRegistration]) def get_project_registration( request: Request, num_bins: int = Query(15, description='The number of bins'), From a563c5de4556b56ec3ad9304ae9361e5b65fdfff Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 9 Aug 2023 14:59:54 -0700 Subject: [PATCH 07/17] add filters --- carbonplan_offsets_db/routers/charts.py | 102 ++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 7 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index c837349..3c4a190 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -1,15 +1,19 @@ +import datetime + from fastapi import APIRouter, Depends, Query, Request -from sqlmodel import Session, and_, case, func +from sqlmodel import Session, and_, case, func, or_ from ..database import get_session from ..logging import get_logger from ..models import Project, ProjectBinnedRegistration +from ..query_helpers import apply_filters +from ..schemas import Registries router = APIRouter() logger = get_logger() -def get_binned_data(*, session, num_bins): +def get_binned_data(*, session, num_bins, projects=None): """ This function bins the projects based on their registration date and groups them by category. @@ -19,6 +23,8 @@ def get_binned_data(*, session, num_bins): SQLAlchemy session for querying the database. num_bins: int, Number of bins to divide the registration dates into. + projects: list, optional + List of projects to be binned. If not provided, the function will query the entire Project table. Returns ------- @@ -27,10 +33,21 @@ def get_binned_data(*, session, num_bins): """ logger.info('📊 Generating binned data...') - # Determine the earliest and latest registration dates in the database. - min_date, max_date = session.query( - func.min(Project.registered_at), func.max(Project.registered_at) - ).one() + if projects: + # Extract dates from provided projects, filtering out None values + registration_dates = [ + project.registered_at for project in projects if project.registered_at is not None + ] + if not registration_dates: + logger.error('❌ No valid registration dates found!') + raise ValueError('Provided projects have no valid registration dates.') + min_date = min(registration_dates) + max_date = max(registration_dates) + else: + # Determine the earliest and latest registration dates in the database. + min_date, max_date = session.query( + func.min(Project.registered_at), func.max(Project.registered_at) + ).one() # Calculate the width of each bin by dividing the total date range by the number of bins. bin_width = (max_date - min_date) / num_bins @@ -83,9 +100,80 @@ def get_binned_data(*, session, num_bins): def get_project_registration( request: Request, num_bins: int = Query(15, description='The number of bins'), + registry: list[Registries] | None = Query(None, description='Registry name'), + country: list[str] | None = Query(None, description='Country name'), + protocol: list[str] | None = Query(None, description='Protocol name'), + category: list[str] | None = Query(None, description='Category name'), + is_arb: bool | None = Query(None, description='Whether project is an ARB project'), + registered_at_from: datetime.date + | datetime.datetime + | None = Query(default=None, description='Format: YYYY-MM-DD'), + registered_at_to: datetime.date + | datetime.datetime + | None = Query(default=None, description='Format: YYYY-MM-DD'), + started_at_from: datetime.date + | datetime.datetime + | None = Query(default=None, description='Format: YYYY-MM-DD'), + started_at_to: datetime.date + | datetime.datetime + | None = Query(default=None, description='Format: YYYY-MM-DD'), + issued_min: int | None = Query(None, description='Minimum number of issued credits'), + issued_max: int | None = Query(None, description='Maximum number of issued credits'), + retired_min: int | None = Query(None, description='Minimum number of retired credits'), + retired_max: int | None = Query(None, description='Maximum number of retired credits'), + search: str + | None = Query( + None, + description='Case insensitive search string. Currently searches on `project_id` and `name` fields only.', + ), session: Session = Depends(get_session), ): """Get aggregated project registration data""" logger.info(f'Getting project registration data: {request.url}') - return get_binned_data(session=session, num_bins=num_bins) + query = session.query(Project) + + # Apply filters + filterable_attributes = [ + ('registry', registry, 'ilike'), + ('country', country, 'ilike'), + ('protocol', protocol, 'ilike'), + ('category', category, 'ilike'), + ] + + for attribute, values, operation in filterable_attributes: + query = apply_filters( + query=query, model=Project, attribute=attribute, values=values, operation=operation + ) + + other_filters = [ + ('is_arb', is_arb, '=='), + ('registered_at', registered_at_from, '>='), + ('registered_at', registered_at_to, '<='), + ('started_at', started_at_from, '>='), + ('started_at', started_at_to, '<='), + ('issued', issued_min, '>='), + ('issued', issued_max, '<='), + ('retired', retired_min, '>='), + ('retired', retired_max, '<='), + ] + + for attribute, values, operation in other_filters: + query = apply_filters( + query=query, model=Project, attribute=attribute, values=values, operation=operation + ) + + if search: + search_pattern = f'%{search}%' + query = query.filter( + or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern)) + ) + + # Fetch filtered projects for binning + filtered_projects = query.all() + # Check if the filtered projects list is empty + if not filtered_projects: + logger.warning('⚠️ No projects found matching the filtering criteria!') + return [] + + return get_binned_data(session=session, num_bins=num_bins, projects=filtered_projects) From 5a0b33253dc8d6f4c8e9c479ac77e5fc511fd27b Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 9 Aug 2023 15:26:27 -0700 Subject: [PATCH 08/17] generalize get_binned_data --- carbonplan_offsets_db/routers/charts.py | 109 ++++++++++++++---------- 1 file changed, 66 insertions(+), 43 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 3c4a190..cf33edd 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -13,69 +13,88 @@ logger = get_logger() -def get_binned_data(*, session, num_bins, projects=None): +def get_binned_data(*, session, num_bins, binning_attribute, projects=None): """ - This function bins the projects based on their registration date and groups them by category. + This function bins the projects based on a specified attribute and groups them by category. Parameters ---------- session: Session SQLAlchemy session for querying the database. - num_bins: int, - Number of bins to divide the registration dates into. + num_bins: int + Number of bins to divide the data into. + binning_attribute: str + Attribute name of the Project model to be used for binning (e.g., 'registered_at' or 'issued'). projects: list, optional List of projects to be binned. If not provided, the function will query the entire Project table. Returns ------- binned_results: list - A list of tuples, each containing the bin label, category, and count of projects. + A list of dictionaries, each containing the bin start, end, category, and count of projects. """ - logger.info('📊 Generating binned data...') + logger.info(f'📊 Generating binned data based on {binning_attribute}...') + + # Dynamically get the attribute from the Project model based on the provided binning_attribute + attribute = getattr(Project, binning_attribute) + + # If projects are provided, extract values for the given binning_attribute. Otherwise, query the database. if projects: - # Extract dates from provided projects, filtering out None values - registration_dates = [ - project.registered_at for project in projects if project.registered_at is not None + values = [ + getattr(project, binning_attribute) + for project in projects + if getattr(project, binning_attribute) is not None ] - if not registration_dates: - logger.error('❌ No valid registration dates found!') - raise ValueError('Provided projects have no valid registration dates.') - min_date = min(registration_dates) - max_date = max(registration_dates) + if not values: + logger.error(f'❌ No valid values found for attribute {binning_attribute}!') + raise ValueError(f'Provided projects have no valid values for {binning_attribute}.') + min_value, max_value = min(values), max(values) else: - # Determine the earliest and latest registration dates in the database. - min_date, max_date = session.query( - func.min(Project.registered_at), func.max(Project.registered_at) - ).one() - - # Calculate the width of each bin by dividing the total date range by the number of bins. - bin_width = (max_date - min_date) / num_bins - - # Create conditions for each bin. Each condition checks if a project's registration date - # falls within the range defined by a bin's start and end dates. Also, assign a label for each bin. - conditions = [ - ( - and_( - Project.registered_at >= min_date + i * bin_width, - Project.registered_at < min_date + (i + 1) * bin_width, - ), - f'{(min_date + i*bin_width).year}-{(min_date + (i+1)*bin_width).year}', - ) - for i in range(num_bins) - ] + # Get the minimum and maximum values for the attribute from the database + min_value, max_value = session.query(func.min(attribute), func.max(attribute)).one() + + # Calculate the width for each bin + bin_width = (max_value - min_value) / num_bins + + # Create conditions for each bin. These conditions will determine which bin a project falls into. + # Check if the binning attribute is a date type + if isinstance(min_value, datetime.date | datetime.datetime): + # Create conditions for each bin. These conditions will determine which bin a project falls into for date attributes. + conditions = [ + ( + and_( + attribute >= min_value + datetime.timedelta(days=i * bin_width.days), + attribute < min_value + datetime.timedelta(days=(i + 1) * bin_width.days), + ), + f'{(min_value + datetime.timedelta(days=i * bin_width.days)).year}-{(min_value + datetime.timedelta(days=(i + 1) * bin_width.days)).year}', + ) + for i in range(num_bins) + ] + else: + # Create conditions for each bin. These conditions will determine which bin a project falls into for numerical attributes. + conditions = [ + ( + and_( + attribute >= min_value + i * bin_width, + attribute < min_value + (i + 1) * bin_width, + ), + f'{min_value + i*bin_width}-{min_value + (i+1)*bin_width}', + ) + for i in range(num_bins) + ] # Using the conditions, generate a CASE statement to assign a bin label to each project. - binned_date = case(conditions, else_='other').label('bin') + binned_attribute = case(conditions, else_='other').label('bin') - # Execute the main query, grouping projects by bin and category, and counting the number of projects in each group. + # Query the database, grouping by the calculated bin and category. Count the number of projects in each group. binned_results = ( - session.query(binned_date, Project.category, func.count(Project.id).label('count')) + session.query(binned_attribute, Project.category, func.count(Project.id).label('count')) .group_by('bin', Project.category) .all() ) - # Validate that the sum of counts from the binned results matches the total number of projects in the database. + # Validate that the counts from binned results match the total number of projects. total_projects = session.query(Project).count() total_binned_counts = sum(result[2] for result in binned_results) if total_projects != total_binned_counts: @@ -84,12 +103,11 @@ def get_binned_data(*, session, num_bins, projects=None): f"Total projects ({total_projects}) doesn't match sum of binned counts ({total_binned_counts})." ) - # Reformat results to a more concise representation. + # Reformat results to be more user-friendly formatted_results = [] for bin_label, category, count in binned_results: - start, end = ( - (int(part) for part in bin_label.split('-')) if '-' in bin_label else (None, None) - ) + logger.info(bin_label) + start, end = (part for part in bin_label.split('-')) if '-' in bin_label else (None, None) formatted_results.append({'start': start, 'end': end, 'category': category, 'count': count}) logger.info('✅ Binned data generated successfully!') @@ -176,4 +194,9 @@ def get_project_registration( logger.warning('⚠️ No projects found matching the filtering criteria!') return [] - return get_binned_data(session=session, num_bins=num_bins, projects=filtered_projects) + return get_binned_data( + session=session, + num_bins=num_bins, + binning_attribute='registered_at', + projects=filtered_projects, + ) From 971b2157a274f037b90f43d4e4c686a2055911b5 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 9 Aug 2023 16:26:05 -0700 Subject: [PATCH 09/17] add issuance_totals endpoint --- carbonplan_offsets_db/models.py | 8 +- carbonplan_offsets_db/routers/charts.py | 128 +++++++++++++++++++++--- 2 files changed, 116 insertions(+), 20 deletions(-) diff --git a/carbonplan_offsets_db/models.py b/carbonplan_offsets_db/models.py index 9725c1d..deff289 100644 --- a/carbonplan_offsets_db/models.py +++ b/carbonplan_offsets_db/models.py @@ -141,8 +141,8 @@ class ProjectStatsWithPagination(pydantic.BaseModel): data: list[ProjectStats] -class ProjectBinnedRegistration(pydantic.BaseModel): - start: int | None - end: int | None +class ProjectBinnedData(pydantic.BaseModel): + start: int | float | None + end: int | float | None category: str | None - count: int | None + value: int | float | None diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index cf33edd..d4e074d 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -5,7 +5,7 @@ from ..database import get_session from ..logging import get_logger -from ..models import Project, ProjectBinnedRegistration +from ..models import Project, ProjectBinnedData from ..query_helpers import apply_filters from ..schemas import Registries @@ -88,33 +88,41 @@ def get_binned_data(*, session, num_bins, binning_attribute, projects=None): binned_attribute = case(conditions, else_='other').label('bin') # Query the database, grouping by the calculated bin and category. Count the number of projects in each group. - binned_results = ( - session.query(binned_attribute, Project.category, func.count(Project.id).label('count')) - .group_by('bin', Project.category) - .all() - ) + if binning_attribute == 'issued': + query = session.query( + binned_attribute, Project.category, func.sum(Project.issued).label('value') + ) + total_values = session.query(func.sum(Project.issued)).scalar() + + else: + query = session.query( + binned_attribute, Project.category, func.count(Project.id).label('value') + ) + total_values = session.query(func.count(Project.id)).scalar() + binned_results = query.group_by('bin', Project.category).all() # Validate that the counts from binned results match the total number of projects. - total_projects = session.query(Project).count() - total_binned_counts = sum(result[2] for result in binned_results) - if total_projects != total_binned_counts: - logger.error('❌ Mismatch in total counts!') + total_binned_values = sum(result[2] for result in binned_results) + logger.info(f'Total values: {total_values}, Total binned values: {total_binned_values}') + if total_values != total_binned_values: + logger.error('❌ Mismatch in total values!') raise ValueError( - f"Total projects ({total_projects}) doesn't match sum of binned counts ({total_binned_counts})." + f"Total values ({total_values}) doesn't match sum of binned values ({total_binned_values})." ) # Reformat results to be more user-friendly formatted_results = [] - for bin_label, category, count in binned_results: - logger.info(bin_label) - start, end = (part for part in bin_label.split('-')) if '-' in bin_label else (None, None) - formatted_results.append({'start': start, 'end': end, 'category': category, 'count': count}) + for bin_label, category, value in binned_results: + start, end = iter(bin_label.split('-')) if '-' in bin_label else (None, None) + if start and end: + start, end = int(float(start)), int(float(end)) + formatted_results.append({'start': start, 'end': end, 'category': category, 'value': value}) logger.info('✅ Binned data generated successfully!') return formatted_results -@router.get('/project_registration', response_model=list[ProjectBinnedRegistration]) +@router.get('/project_registration', response_model=list[ProjectBinnedData]) def get_project_registration( request: Request, num_bins: int = Query(15, description='The number of bins'), @@ -200,3 +208,91 @@ def get_project_registration( binning_attribute='registered_at', projects=filtered_projects, ) + + +@router.get('/issuance_totals', response_model=list[ProjectBinnedData]) +def get_issuance_totals( + request: Request, + num_bins: int = Query(15, description='The number of bins'), + registry: list[Registries] | None = Query(None, description='Registry name'), + country: list[str] | None = Query(None, description='Country name'), + protocol: list[str] | None = Query(None, description='Protocol name'), + category: list[str] | None = Query(None, description='Category name'), + is_arb: bool | None = Query(None, description='Whether project is an ARB project'), + registered_at_from: datetime.date + | datetime.datetime + | None = Query(default=None, description='Format: YYYY-MM-DD'), + registered_at_to: datetime.date + | datetime.datetime + | None = Query(default=None, description='Format: YYYY-MM-DD'), + started_at_from: datetime.date + | datetime.datetime + | None = Query(default=None, description='Format: YYYY-MM-DD'), + started_at_to: datetime.date + | datetime.datetime + | None = Query(default=None, description='Format: YYYY-MM-DD'), + issued_min: int | None = Query(None, description='Minimum number of issued credits'), + issued_max: int | None = Query(None, description='Maximum number of issued credits'), + retired_min: int | None = Query(None, description='Minimum number of retired credits'), + retired_max: int | None = Query(None, description='Maximum number of retired credits'), + search: str + | None = Query( + None, + description='Case insensitive search string. Currently searches on `project_id` and `name` fields only.', + ), + session: Session = Depends(get_session), +): + """Get aggregated project registration data""" + logger.info(f'Getting project registration data: {request.url}') + + query = session.query(Project) + + # Apply filters + filterable_attributes = [ + ('registry', registry, 'ilike'), + ('country', country, 'ilike'), + ('protocol', protocol, 'ilike'), + ('category', category, 'ilike'), + ] + + for attribute, values, operation in filterable_attributes: + query = apply_filters( + query=query, model=Project, attribute=attribute, values=values, operation=operation + ) + + other_filters = [ + ('is_arb', is_arb, '=='), + ('registered_at', registered_at_from, '>='), + ('registered_at', registered_at_to, '<='), + ('started_at', started_at_from, '>='), + ('started_at', started_at_to, '<='), + ('issued', issued_min, '>='), + ('issued', issued_max, '<='), + ('retired', retired_min, '>='), + ('retired', retired_max, '<='), + ] + + for attribute, values, operation in other_filters: + query = apply_filters( + query=query, model=Project, attribute=attribute, values=values, operation=operation + ) + + if search: + search_pattern = f'%{search}%' + query = query.filter( + or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern)) + ) + + # Fetch filtered projects for binning + filtered_projects = query.all() + # Check if the filtered projects list is empty + if not filtered_projects: + logger.warning('⚠️ No projects found matching the filtering criteria!') + return [] + + return get_binned_data( + session=session, + num_bins=num_bins, + binning_attribute='issued', + projects=filtered_projects, + ) From c3cb32c1aeba5efa47fdce777b555b8d96dec50e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 9 Aug 2023 16:29:35 -0700 Subject: [PATCH 10/17] rename models --- carbonplan_offsets_db/models.py | 15 +++++++++++---- carbonplan_offsets_db/routers/charts.py | 6 +++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/carbonplan_offsets_db/models.py b/carbonplan_offsets_db/models.py index deff289..7060f86 100644 --- a/carbonplan_offsets_db/models.py +++ b/carbonplan_offsets_db/models.py @@ -141,8 +141,15 @@ class ProjectStatsWithPagination(pydantic.BaseModel): data: list[ProjectStats] -class ProjectBinnedData(pydantic.BaseModel): - start: int | float | None - end: int | float | None +class ProjectBinnedRegistration(pydantic.BaseModel): + start: int | None + end: int | None category: str | None - value: int | float | None + value: int | None + + +class ProjectBinnedIssuanceTotals(pydantic.BaseModel): + start: float | None + end: float | None + category: str | None + value: float | None diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index d4e074d..12d8932 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -5,7 +5,7 @@ from ..database import get_session from ..logging import get_logger -from ..models import Project, ProjectBinnedData +from ..models import Project, ProjectBinnedIssuanceTotals, ProjectBinnedRegistration from ..query_helpers import apply_filters from ..schemas import Registries @@ -122,7 +122,7 @@ def get_binned_data(*, session, num_bins, binning_attribute, projects=None): return formatted_results -@router.get('/project_registration', response_model=list[ProjectBinnedData]) +@router.get('/project_registration', response_model=list[ProjectBinnedRegistration]) def get_project_registration( request: Request, num_bins: int = Query(15, description='The number of bins'), @@ -210,7 +210,7 @@ def get_project_registration( ) -@router.get('/issuance_totals', response_model=list[ProjectBinnedData]) +@router.get('/issuance_totals', response_model=list[ProjectBinnedIssuanceTotals]) def get_issuance_totals( request: Request, num_bins: int = Query(15, description='The number of bins'), From 7ff1a3092de005b0aea575a10107476e5b2f1f23 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 10 Aug 2023 16:45:08 -0700 Subject: [PATCH 11/17] update binning --- carbonplan_offsets_db/models.py | 4 +- carbonplan_offsets_db/routers/charts.py | 167 ++++++++++-------------- 2 files changed, 68 insertions(+), 103 deletions(-) diff --git a/carbonplan_offsets_db/models.py b/carbonplan_offsets_db/models.py index 7060f86..c88238f 100644 --- a/carbonplan_offsets_db/models.py +++ b/carbonplan_offsets_db/models.py @@ -142,8 +142,8 @@ class ProjectStatsWithPagination(pydantic.BaseModel): class ProjectBinnedRegistration(pydantic.BaseModel): - start: int | None - end: int | None + start: datetime.date | None + end: datetime.date | None category: str | None value: int | None diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 12d8932..78de4d5 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -1,5 +1,7 @@ import datetime +import typing +import pandas as pd from fastapi import APIRouter, Depends, Query, Request from sqlmodel import Session, and_, case, func, or_ @@ -13,119 +15,91 @@ logger = get_logger() -def get_binned_data(*, session, num_bins, binning_attribute, projects=None): - """ - This function bins the projects based on a specified attribute and groups them by category. +def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'M', 'Y']): + # Determine the end-of-period date based on the frequency + if freq == 'D': # Daily frequency + end_of_period = pd.Timestamp(max_value) + elif freq == 'M': # Monthly frequency + end_of_period = ( + pd.Timestamp(max_value).replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1) + ) + elif freq == 'Y': # Yearly frequency + end_of_period = pd.Timestamp(max_value).replace(month=12, day=31) + else: + raise ValueError("Unsupported frequency. Use 'D', 'M', or 'Y'.") + + # Generate date bins with the specified frequency + date_bins = pd.date_range(start=min_value, end=max_value, freq=freq) + + # Ensure that the end-of-period date is included + if len(date_bins) == 0 or date_bins[-1] != end_of_period: + date_bins = date_bins.append(pd.DatetimeIndex([end_of_period])) - Parameters - ---------- - session: Session - SQLAlchemy session for querying the database. - num_bins: int - Number of bins to divide the data into. - binning_attribute: str - Attribute name of the Project model to be used for binning (e.g., 'registered_at' or 'issued'). - projects: list, optional - List of projects to be binned. If not provided, the function will query the entire Project table. + return date_bins - Returns - ------- - binned_results: list - A list of dictionaries, each containing the bin start, end, category, and count of projects. - """ +def get_binned_data(*, query, binning_attribute): logger.info(f'📊 Generating binned data based on {binning_attribute}...') # Dynamically get the attribute from the Project model based on the provided binning_attribute attribute = getattr(Project, binning_attribute) + min_value, max_value = query.with_entities(func.min(attribute), func.max(attribute)).one() - # If projects are provided, extract values for the given binning_attribute. Otherwise, query the database. - if projects: - values = [ - getattr(project, binning_attribute) - for project in projects - if getattr(project, binning_attribute) is not None - ] - if not values: - logger.error(f'❌ No valid values found for attribute {binning_attribute}!') - raise ValueError(f'Provided projects have no valid values for {binning_attribute}.') - min_value, max_value = min(values), max(values) - else: - # Get the minimum and maximum values for the attribute from the database - min_value, max_value = session.query(func.min(attribute), func.max(attribute)).one() + logger.info(f'📊 Min value: {min_value}, max value: {max_value}') - # Calculate the width for each bin - bin_width = (max_value - min_value) / num_bins + # Check if the binning attribute is a date type and create yearly bins using pandas date_range + if isinstance(min_value, datetime.date) or isinstance(min_value, datetime.datetime): + date_bins = generate_date_bins(min_value=min_value, max_value=max_value, freq='Y') + + logger.info(f'📅 Binning by date with {date_bins} bins...') - # Create conditions for each bin. These conditions will determine which bin a project falls into. - # Check if the binning attribute is a date type - if isinstance(min_value, datetime.date | datetime.datetime): - # Create conditions for each bin. These conditions will determine which bin a project falls into for date attributes. - conditions = [ - ( - and_( - attribute >= min_value + datetime.timedelta(days=i * bin_width.days), - attribute < min_value + datetime.timedelta(days=(i + 1) * bin_width.days), - ), - f'{(min_value + datetime.timedelta(days=i * bin_width.days)).year}-{(min_value + datetime.timedelta(days=(i + 1) * bin_width.days)).year}', - ) - for i in range(num_bins) - ] - else: - # Create conditions for each bin. These conditions will determine which bin a project falls into for numerical attributes. conditions = [ ( and_( - attribute >= min_value + i * bin_width, - attribute < min_value + (i + 1) * bin_width, + attribute >= date_bins[i], + attribute < date_bins[i + 1], ), - f'{min_value + i*bin_width}-{min_value + (i+1)*bin_width}', + str(date_bins[i].year), ) - for i in range(num_bins) + for i in range(len(date_bins) - 1) ] - # Using the conditions, generate a CASE statement to assign a bin label to each project. - binned_attribute = case(conditions, else_='other').label('bin') - - # Query the database, grouping by the calculated bin and category. Count the number of projects in each group. - if binning_attribute == 'issued': - query = session.query( - binned_attribute, Project.category, func.sum(Project.issued).label('value') - ) - total_values = session.query(func.sum(Project.issued)).scalar() - - else: - query = session.query( - binned_attribute, Project.category, func.count(Project.id).label('value') + # Check if there are any conditions + if conditions: + binned_attribute = case(conditions, else_='other').label('bin') + elif len(date_bins) == 1: + binned_attribute = func.concat(date_bins[0].year).label( + 'bin' + ) # Use concat to return a string literal + else: + binned_attribute = 'other' + + # Query the database, grouping by the calculated bin and category. Count the number of projects in each group. + query = query.with_entities( + binned_attribute, Project.category, func.count(Project.project_id).label('value') ) - total_values = session.query(func.count(Project.id)).scalar() - binned_results = query.group_by('bin', Project.category).all() - - # Validate that the counts from binned results match the total number of projects. - total_binned_values = sum(result[2] for result in binned_results) - logger.info(f'Total values: {total_values}, Total binned values: {total_binned_values}') - if total_values != total_binned_values: - logger.error('❌ Mismatch in total values!') - raise ValueError( - f"Total values ({total_values}) doesn't match sum of binned values ({total_binned_values})." - ) - - # Reformat results to be more user-friendly - formatted_results = [] - for bin_label, category, value in binned_results: - start, end = iter(bin_label.split('-')) if '-' in bin_label else (None, None) - if start and end: - start, end = int(float(start)), int(float(end)) - formatted_results.append({'start': start, 'end': end, 'category': category, 'value': value}) + binned_results = query.group_by('bin', Project.category).all() + # Reformat results to be more user-friendly + formatted_results = [] + for bin_label, category, value in binned_results: + if bin_label == 'other': + start, end = None, None + else: + start, end = datetime.date(int(bin_label), 1, 1), datetime.date( + int(bin_label) + 1, 1, 1 + ) + formatted_results.append( + ProjectBinnedRegistration(start=start, end=end, category=category, value=value) + ) - logger.info('✅ Binned data generated successfully!') - return formatted_results + logger.info('✅ Binned data generated successfully!') + return formatted_results + return [] -@router.get('/project_registration', response_model=list[ProjectBinnedRegistration]) -def get_project_registration( +@router.get('/projects_by_registration_date', response_model=list[ProjectBinnedRegistration]) +def get_projects_by_registration_date( request: Request, - num_bins: int = Query(15, description='The number of bins'), registry: list[Registries] | None = Query(None, description='Registry name'), country: list[str] | None = Query(None, description='Country name'), protocol: list[str] | None = Query(None, description='Protocol name'), @@ -195,18 +169,9 @@ def get_project_registration( or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern)) ) - # Fetch filtered projects for binning - filtered_projects = query.all() - # Check if the filtered projects list is empty - if not filtered_projects: - logger.warning('⚠️ No projects found matching the filtering criteria!') - return [] - return get_binned_data( - session=session, - num_bins=num_bins, binning_attribute='registered_at', - projects=filtered_projects, + query=query, ) From a141dd71efa351faf737d96fd930dd7cca96f260 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 10 Aug 2023 17:42:48 -0700 Subject: [PATCH 12/17] refactor --- carbonplan_offsets_db/routers/charts.py | 123 ++++++++++++------------ 1 file changed, 59 insertions(+), 64 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 78de4d5..33a2af6 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -3,7 +3,7 @@ import pandas as pd from fastapi import APIRouter, Depends, Query, Request -from sqlmodel import Session, and_, case, func, or_ +from sqlmodel import Session, and_, case, func, or_, text from ..database import get_session from ..logging import get_logger @@ -15,91 +15,89 @@ logger = get_logger() -def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'M', 'Y']): - # Determine the end-of-period date based on the frequency - if freq == 'D': # Daily frequency - end_of_period = pd.Timestamp(max_value) - elif freq == 'M': # Monthly frequency +def calculate_end_date(start_date, freq): + """Calculate the end date based on the start date and frequency.""" + if freq == 'D': + return start_date + pd.DateOffset(days=1) + elif freq == 'W': + return start_date + pd.DateOffset(weeks=1) + elif freq == 'M': + return start_date + pd.DateOffset(months=1) + pd.offsets.MonthEnd(0) + else: # freq == 'Y' + return start_date + pd.DateOffset(years=1) + pd.offsets.YearEnd(0) + + +def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'W', 'M', 'Y']): + """Generate date bins with the specified frequency.""" + end_of_period = pd.Timestamp(max_value) + if freq == 'M': end_of_period = ( - pd.Timestamp(max_value).replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1) + end_of_period.replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1) ) - elif freq == 'Y': # Yearly frequency - end_of_period = pd.Timestamp(max_value).replace(month=12, day=31) - else: - raise ValueError("Unsupported frequency. Use 'D', 'M', or 'Y'.") + elif freq == 'Y': + end_of_period = end_of_period.replace(month=12, day=31) - # Generate date bins with the specified frequency date_bins = pd.date_range(start=min_value, end=max_value, freq=freq) - # Ensure that the end-of-period date is included + # Ensure the last date is included if len(date_bins) == 0 or date_bins[-1] != end_of_period: date_bins = date_bins.append(pd.DatetimeIndex([end_of_period])) + logger.info(f'📅 Binning by date with {len(date_bins)} bins...') return date_bins -def get_binned_data(*, query, binning_attribute): +def get_binned_data(*, query, binning_attribute, freq='Y'): + """Generate binned data based on the given attribute and frequency.""" logger.info(f'📊 Generating binned data based on {binning_attribute}...') - - # Dynamically get the attribute from the Project model based on the provided binning_attribute attribute = getattr(Project, binning_attribute) min_value, max_value = query.with_entities(func.min(attribute), func.max(attribute)).one() - logger.info(f'📊 Min value: {min_value}, max value: {max_value}') + date_bins = generate_date_bins(min_value=min_value, max_value=max_value, freq=freq) - # Check if the binning attribute is a date type and create yearly bins using pandas date_range - if isinstance(min_value, datetime.date) or isinstance(min_value, datetime.datetime): - date_bins = generate_date_bins(min_value=min_value, max_value=max_value, freq='Y') + # Create conditions for each bin + conditions = [ + ( + and_(attribute >= date_bins[i], attribute < date_bins[i + 1]), + str(date_bins[i].date()), + ) + for i in range(len(date_bins) - 1) + ] - logger.info(f'📅 Binning by date with {date_bins} bins...') + # Define the binned attribute + if conditions: + binned_attribute = case(conditions, else_='other').label('bin') + elif len(date_bins) == 1: + binned_attribute = func.concat(date_bins[0].date()).label( + 'bin' + ) # Use concat to return a string literal + else: + binned_attribute = text('other') # Explicitly declare the text literal - conditions = [ - ( - and_( - attribute >= date_bins[i], - attribute < date_bins[i + 1], - ), - str(date_bins[i].year), + # Query and format the results + query = query.with_entities( + binned_attribute, Project.category, func.count(Project.project_id).label('value') + ) + binned_results = query.group_by('bin', Project.category).all() + + formatted_results = [] + for bin_label, category, value in binned_results: + start_date = pd.Timestamp(bin_label) if bin_label != 'other' else None + end_date = calculate_end_date(start_date, freq).date() if start_date else None + formatted_results.append( + ProjectBinnedRegistration( + start=start_date, end=end_date, category=category, value=value ) - for i in range(len(date_bins) - 1) - ] - - # Check if there are any conditions - if conditions: - binned_attribute = case(conditions, else_='other').label('bin') - elif len(date_bins) == 1: - binned_attribute = func.concat(date_bins[0].year).label( - 'bin' - ) # Use concat to return a string literal - else: - binned_attribute = 'other' - - # Query the database, grouping by the calculated bin and category. Count the number of projects in each group. - query = query.with_entities( - binned_attribute, Project.category, func.count(Project.project_id).label('value') ) - binned_results = query.group_by('bin', Project.category).all() - # Reformat results to be more user-friendly - formatted_results = [] - for bin_label, category, value in binned_results: - if bin_label == 'other': - start, end = None, None - else: - start, end = datetime.date(int(bin_label), 1, 1), datetime.date( - int(bin_label) + 1, 1, 1 - ) - formatted_results.append( - ProjectBinnedRegistration(start=start, end=end, category=category, value=value) - ) - logger.info('✅ Binned data generated successfully!') - return formatted_results - return [] + logger.info('✅ Binned data generated successfully!') + return formatted_results @router.get('/projects_by_registration_date', response_model=list[ProjectBinnedRegistration]) def get_projects_by_registration_date( request: Request, + freq: typing.Literal['D', 'W', 'M', 'Y'] = Query('Y', description='Frequency of bins'), registry: list[Registries] | None = Query(None, description='Registry name'), country: list[str] | None = Query(None, description='Country name'), protocol: list[str] | None = Query(None, description='Protocol name'), @@ -169,10 +167,7 @@ def get_projects_by_registration_date( or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern)) ) - return get_binned_data( - binning_attribute='registered_at', - query=query, - ) + return get_binned_data(binning_attribute='registered_at', query=query, freq=freq) @router.get('/issuance_totals', response_model=list[ProjectBinnedIssuanceTotals]) From 5833f3845dd0d986a0c036d28d49d655e9664e8d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 10 Aug 2023 18:22:31 -0700 Subject: [PATCH 13/17] fix conditions --- carbonplan_offsets_db/routers/charts.py | 41 +++++++++++++++---------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 33a2af6..ec318aa 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -3,7 +3,7 @@ import pandas as pd from fastapi import APIRouter, Depends, Query, Request -from sqlmodel import Session, and_, case, func, or_, text +from sqlmodel import Session, and_, case, func, or_ from ..database import get_session from ..logging import get_logger @@ -53,26 +53,33 @@ def get_binned_data(*, query, binning_attribute, freq='Y'): attribute = getattr(Project, binning_attribute) min_value, max_value = query.with_entities(func.min(attribute), func.max(attribute)).one() + if min_value is None or max_value is None: + logger.info('✅ No data to bin!') + return [] + date_bins = generate_date_bins(min_value=min_value, max_value=max_value, freq=freq) - # Create conditions for each bin - conditions = [ - ( - and_(attribute >= date_bins[i], attribute < date_bins[i + 1]), - str(date_bins[i].date()), + conditions = [] + # Handle the case of exactly one non-null date bin + if len(date_bins) == 1: + conditions.append((attribute.isnot(None), func.concat(date_bins[0].date()))) + + # Handle the case of multiple non-null date bins + else: + conditions.extend( + [ + ( + and_(attribute >= date_bins[i], attribute < date_bins[i + 1]), + str(date_bins[i].date()), + ) + for i in range(len(date_bins) - 1) + ] ) - for i in range(len(date_bins) - 1) - ] + # Add condition for null registration dates + conditions.append((attribute.is_(None), 'null')) # Define the binned attribute - if conditions: - binned_attribute = case(conditions, else_='other').label('bin') - elif len(date_bins) == 1: - binned_attribute = func.concat(date_bins[0].date()).label( - 'bin' - ) # Use concat to return a string literal - else: - binned_attribute = text('other') # Explicitly declare the text literal + binned_attribute = case(conditions, else_='other').label('bin') # Query and format the results query = query.with_entities( @@ -82,7 +89,7 @@ def get_binned_data(*, query, binning_attribute, freq='Y'): formatted_results = [] for bin_label, category, value in binned_results: - start_date = pd.Timestamp(bin_label) if bin_label != 'other' else None + start_date = pd.Timestamp(bin_label) if bin_label not in ['other', 'null'] else None end_date = calculate_end_date(start_date, freq).date() if start_date else None formatted_results.append( ProjectBinnedRegistration( From 9ef1a2966495964a17f8f84c298cab3aa0da1755 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 11 Aug 2023 01:18:39 -0700 Subject: [PATCH 14/17] add credits_by_issuance_table --- carbonplan_offsets_db/routers/charts.py | 206 ++++++++++++++++++------ 1 file changed, 161 insertions(+), 45 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index ec318aa..02edfb4 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -1,13 +1,14 @@ import datetime import typing +import numpy as np import pandas as pd from fastapi import APIRouter, Depends, Query, Request -from sqlmodel import Session, and_, case, func, or_ +from sqlmodel import Session, and_, case, func from ..database import get_session from ..logging import get_logger -from ..models import Project, ProjectBinnedIssuanceTotals, ProjectBinnedRegistration +from ..models import Credit, Project, ProjectBinnedIssuanceTotals, ProjectBinnedRegistration from ..query_helpers import apply_filters from ..schemas import Registries @@ -22,31 +23,73 @@ def calculate_end_date(start_date, freq): elif freq == 'W': return start_date + pd.DateOffset(weeks=1) elif freq == 'M': - return start_date + pd.DateOffset(months=1) + pd.offsets.MonthEnd(0) + return start_date + pd.DateOffset(months=1) - pd.DateOffset(days=1) else: # freq == 'Y' - return start_date + pd.DateOffset(years=1) + pd.offsets.YearEnd(0) + return start_date + pd.DateOffset(years=1) - pd.DateOffset(days=1) def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'W', 'M', 'Y']): """Generate date bins with the specified frequency.""" + start_of_period = pd.Timestamp(min_value) end_of_period = pd.Timestamp(max_value) if freq == 'M': end_of_period = ( end_of_period.replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1) ) elif freq == 'Y': + start_of_period = start_of_period.replace(month=1, day=1) # Start of the year end_of_period = end_of_period.replace(month=12, day=31) - date_bins = pd.date_range(start=min_value, end=max_value, freq=freq) + frequency_mapping = {'Y': 'AS', 'M': 'MS', 'W': 'W', 'D': 'D'} + + logger.info( + f'📅 Binning by date with {freq} frequency, start_period: {start_of_period}, end_of_period: {end_of_period}' + ) + + date_bins = pd.date_range( + start=start_of_period, end=end_of_period, freq=frequency_mapping[freq] + ) # Ensure the last date is included if len(date_bins) == 0 or date_bins[-1] != end_of_period: date_bins = date_bins.append(pd.DatetimeIndex([end_of_period])) - logger.info(f'📅 Binning by date with {len(date_bins)} bins...') + logger.info(f'📅 Binning by date with {len(date_bins)} bins...: {date_bins}') return date_bins +def generate_dynamic_numeric_bins(*, min_value, max_value, bin_width=None): + """Generate numeric bins with dynamically adjusted bin width.""" + # Check for edge cases where min and max are the same + if min_value == max_value: + return np.array([min_value]) + + if bin_width is None: + # Calculate the range and order of magnitude + value_range = max_value - min_value + order_of_magnitude = int(np.floor(np.log10(value_range))) + + # Determine the bin width based on the order of magnitude + if order_of_magnitude < 2: + bin_width = 10 # Tens for very small ranges + elif order_of_magnitude < 3: + bin_width = 100 # Hundreds for small ranges + elif order_of_magnitude < 4: + bin_width = 1000 # Thousands for lower moderate ranges + elif order_of_magnitude < 5: + bin_width = 10000 # Ten thousands for upper moderate ranges + elif order_of_magnitude < 6: + bin_width = 100000 # Hundred thousands for large ranges + else: + bin_width = 1000000 # Millions for very large ranges + + # Generate evenly spaced values using the determined bin width + numeric_bins = np.arange(min_value, max_value + bin_width, bin_width) + + logger.info(f'🔢 Binning by numeric value with {len(numeric_bins)} bins, width: {bin_width}...') + return numeric_bins + + def get_binned_data(*, query, binning_attribute, freq='Y'): """Generate binned data based on the given attribute and frequency.""" logger.info(f'📊 Generating binned data based on {binning_attribute}...') @@ -101,6 +144,112 @@ def get_binned_data(*, query, binning_attribute, freq='Y'): return formatted_results +def get_binned_numeric_data(*, query, binning_attribute): + """Generate binned data based on the given numeric attribute.""" + logger.info(f'📊 Generating binned data based on {binning_attribute}...') + attribute = getattr(Credit, binning_attribute) + min_value, max_value = query.with_entities(func.min(attribute), func.max(attribute)).one() + + if min_value is None or max_value is None: + logger.info('✅ No data to bin!') + return [] + + numeric_bins = generate_dynamic_numeric_bins(min_value=min_value, max_value=max_value) + + conditions = [] + # Handle the case of exactly one non-null bin + if len(numeric_bins) == 1: + conditions.append((attribute.isnot(None), str(int(numeric_bins[0])))) + + # Handle the case of multiple non-null bins + else: + conditions.extend( + [ + ( + and_(attribute >= int(numeric_bins[i]), attribute < int(numeric_bins[i + 1])), + str(int(numeric_bins[i])), + ) + for i in range(len(numeric_bins) - 1) + ] + ) + # Add condition for null attributes + conditions.append((attribute.is_(None), 'null')) + + # Define the binned attribute + binned_attribute = case(conditions, else_='other').label('bin') + + # Query and format the results + query = query.with_entities( + binned_attribute, Project.category, func.sum(Credit.quantity).label('value') + ) + binned_results = query.group_by('bin', Project.category).all() + + formatted_results = [] + for bin_label, category, value in binned_results: + start_value = float(bin_label) if bin_label not in ['other', 'null'] else None + end_value = start_value + 1 if start_value else None + formatted_results.append( + ProjectBinnedIssuanceTotals( + start=start_value, end=end_value, category=category, value=value + ) + ) + + logger.info('✅ Binned data generated successfully!') + return formatted_results + + +def credits_by_issuance_date(*, query, freq='Y'): + """Generate binned data based on the issuance date.""" + logger.info('📊 Generating binned data based on issuance date...') + + # Extract the minimum and maximum transaction_date + min_date, max_date = query.with_entities( + func.min(Credit.transaction_date), func.max(Credit.transaction_date) + ).one() + + if min_date is None or max_date is None: + logger.info('✅ No data to bin!') + return [] + + # Generate date bins based on the frequency + date_bins = generate_date_bins(min_value=min_date, max_value=max_date, freq=freq) + + # Create conditions for binning + conditions = [ + ( + and_( + Credit.transaction_date >= date_bins[i], Credit.transaction_date < date_bins[i + 1] + ), + str(date_bins[i].date()), + ) + for i in range(len(date_bins) - 1) + ] + conditions.append((Credit.transaction_date.is_(None), 'null')) + + # Define the binned attribute + binned_attribute = case(conditions, else_='other').label('bin') + + # Query and format the results + query = query.with_entities( + binned_attribute, Project.category, func.sum(Credit.quantity).label('value') + ).group_by('bin', Project.category) + + binned_results = query.all() + + formatted_results = [] + for bin_label, category, value in binned_results: + start_date = pd.Timestamp(bin_label) if bin_label not in ['other', 'null'] else None + end_date = calculate_end_date(start_date, freq).date() if start_date else None + formatted_results.append( + ProjectBinnedRegistration( + start=start_date, end=end_date, category=category, value=value + ) + ) + + logger.info('✅ Binned data generated successfully!') + return formatted_results + + @router.get('/projects_by_registration_date', response_model=list[ProjectBinnedRegistration]) def get_projects_by_registration_date( request: Request, @@ -126,11 +275,6 @@ def get_projects_by_registration_date( issued_max: int | None = Query(None, description='Maximum number of issued credits'), retired_min: int | None = Query(None, description='Minimum number of retired credits'), retired_max: int | None = Query(None, description='Maximum number of retired credits'), - search: str - | None = Query( - None, - description='Case insensitive search string. Currently searches on `project_id` and `name` fields only.', - ), session: Session = Depends(get_session), ): """Get aggregated project registration data""" @@ -168,19 +312,13 @@ def get_projects_by_registration_date( query=query, model=Project, attribute=attribute, values=values, operation=operation ) - if search: - search_pattern = f'%{search}%' - query = query.filter( - or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern)) - ) - return get_binned_data(binning_attribute='registered_at', query=query, freq=freq) -@router.get('/issuance_totals', response_model=list[ProjectBinnedIssuanceTotals]) -def get_issuance_totals( +@router.get('/credits_by_issuance_date', response_model=list[dict]) +def get_credits_by_issuance_date( request: Request, - num_bins: int = Query(15, description='The number of bins'), + freq: typing.Literal['D', 'W', 'M', 'Y'] = Query('Y', description='Frequency of bins'), registry: list[Registries] | None = Query(None, description='Registry name'), country: list[str] | None = Query(None, description='Country name'), protocol: list[str] | None = Query(None, description='Protocol name'), @@ -202,17 +340,13 @@ def get_issuance_totals( issued_max: int | None = Query(None, description='Maximum number of issued credits'), retired_min: int | None = Query(None, description='Minimum number of retired credits'), retired_max: int | None = Query(None, description='Maximum number of retired credits'), - search: str - | None = Query( - None, - description='Case insensitive search string. Currently searches on `project_id` and `name` fields only.', - ), session: Session = Depends(get_session), ): """Get aggregated project registration data""" logger.info(f'Getting project registration data: {request.url}') - query = session.query(Project) + # join Credit with Project on project_id + query = session.query(Credit).join(Project, Credit.project_id == Project.project_id) # Apply filters filterable_attributes = [ @@ -244,22 +378,4 @@ def get_issuance_totals( query=query, model=Project, attribute=attribute, values=values, operation=operation ) - if search: - search_pattern = f'%{search}%' - query = query.filter( - or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern)) - ) - - # Fetch filtered projects for binning - filtered_projects = query.all() - # Check if the filtered projects list is empty - if not filtered_projects: - logger.warning('⚠️ No projects found matching the filtering criteria!') - return [] - - return get_binned_data( - session=session, - num_bins=num_bins, - binning_attribute='issued', - projects=filtered_projects, - ) + return credits_by_issuance_date(query=query, freq=freq) From 415f1b44e2be0c2f6be3c969827eae75faf1e484 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 11 Aug 2023 01:36:31 -0700 Subject: [PATCH 15/17] refactor functions --- carbonplan_offsets_db/routers/charts.py | 70 +++++++++++++++---------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 02edfb4..a47bf7f 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -18,43 +18,57 @@ def calculate_end_date(start_date, freq): """Calculate the end date based on the start date and frequency.""" - if freq == 'D': - return start_date + pd.DateOffset(days=1) - elif freq == 'W': - return start_date + pd.DateOffset(weeks=1) - elif freq == 'M': - return start_date + pd.DateOffset(months=1) - pd.DateOffset(days=1) - else: # freq == 'Y' - return start_date + pd.DateOffset(years=1) - pd.DateOffset(days=1) - - -def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'W', 'M', 'Y']): - """Generate date bins with the specified frequency.""" - start_of_period = pd.Timestamp(min_value) - end_of_period = pd.Timestamp(max_value) - if freq == 'M': - end_of_period = ( - end_of_period.replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1) - ) - elif freq == 'Y': - start_of_period = start_of_period.replace(month=1, day=1) # Start of the year - end_of_period = end_of_period.replace(month=12, day=31) + offset_mapping = { + 'D': pd.DateOffset(days=1), + 'W': pd.DateOffset(weeks=1), + 'M': pd.DateOffset(months=1), + 'Y': pd.DateOffset(years=1), + } + + end_date = start_date + offset_mapping[freq] + if freq in ['M', 'Y']: + end_date -= pd.DateOffset(days=1) + + return end_date + + +def generate_date_bins(min_value, max_value, freq: typing.Literal['D', 'W', 'M', 'Y']): + """ + Generate date bins with the specified frequency. + + Parameters + ---------- + min_value : datetime.date + The minimum date value. + max_value : datetime.date + The maximum date value. + freq : str + The frequency for binning. Can be 'D', 'W', 'M', or 'Y'. + + Returns + ------- + pd.DatetimeIndex + The generated date bins. + """ frequency_mapping = {'Y': 'AS', 'M': 'MS', 'W': 'W', 'D': 'D'} - - logger.info( - f'📅 Binning by date with {freq} frequency, start_period: {start_of_period}, end_of_period: {end_of_period}' - ) - date_bins = pd.date_range( - start=start_of_period, end=end_of_period, freq=frequency_mapping[freq] + start=pd.Timestamp(min_value), end=pd.Timestamp(max_value), freq=frequency_mapping[freq] ) # Ensure the last date is included + if freq == 'M': + end_of_period = ( + pd.Timestamp(max_value).replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1) + ) + elif freq == 'Y': + end_of_period = pd.Timestamp(max_value).replace(month=12, day=31) + else: + end_of_period = pd.Timestamp(max_value) + if len(date_bins) == 0 or date_bins[-1] != end_of_period: date_bins = date_bins.append(pd.DatetimeIndex([end_of_period])) - logger.info(f'📅 Binning by date with {len(date_bins)} bins...: {date_bins}') return date_bins From 11ec5249189d8305e392d9e8117b1a6bae4a0036 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 11 Aug 2023 11:07:03 -0700 Subject: [PATCH 16/17] fix date bins --- carbonplan_offsets_db/routers/charts.py | 102 +++++++++++------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index a47bf7f..0e20c8f 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -52,22 +52,17 @@ def generate_date_bins(min_value, max_value, freq: typing.Literal['D', 'W', 'M', The generated date bins. """ frequency_mapping = {'Y': 'AS', 'M': 'MS', 'W': 'W', 'D': 'D'} + min_value, max_value = pd.Timestamp(min_value), pd.Timestamp(max_value) date_bins = pd.date_range( - start=pd.Timestamp(min_value), end=pd.Timestamp(max_value), freq=frequency_mapping[freq] + start=pd.Timestamp(min_value).replace(month=1, day=1), + end=pd.Timestamp(max_value).replace(month=12, day=31), + freq=frequency_mapping[freq], + normalize=True, ) # Ensure the last date is included - if freq == 'M': - end_of_period = ( - pd.Timestamp(max_value).replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1) - ) - elif freq == 'Y': - end_of_period = pd.Timestamp(max_value).replace(month=12, day=31) - else: - end_of_period = pd.Timestamp(max_value) - - if len(date_bins) == 0 or date_bins[-1] != end_of_period: - date_bins = date_bins.append(pd.DatetimeIndex([end_of_period])) + if len(date_bins) == 0 or date_bins[-1] < max_value: + date_bins = date_bins.append(pd.DatetimeIndex([max_value.replace(month=12, day=31)])) return date_bins @@ -229,15 +224,23 @@ def credits_by_issuance_date(*, query, freq='Y'): date_bins = generate_date_bins(min_value=min_date, max_value=max_date, freq=freq) # Create conditions for binning - conditions = [ - ( - and_( - Credit.transaction_date >= date_bins[i], Credit.transaction_date < date_bins[i + 1] - ), - str(date_bins[i].date()), + conditions = [] + # Handle the case of exactly one non-null date bin + if len(date_bins) == 1: + conditions.append((Credit.transaction_date.isnot(None), str(date_bins[0].date()))) + else: + conditions.extend( + [ + ( + and_( + Credit.transaction_date >= date_bins[i], + Credit.transaction_date < date_bins[i + 1], + ), + str(date_bins[i].date()), + ) + for i in range(len(date_bins) - 1) + ] ) - for i in range(len(date_bins) - 1) - ] conditions.append((Credit.transaction_date.is_(None), 'null')) # Define the binned attribute @@ -334,26 +337,16 @@ def get_credits_by_issuance_date( request: Request, freq: typing.Literal['D', 'W', 'M', 'Y'] = Query('Y', description='Frequency of bins'), registry: list[Registries] | None = Query(None, description='Registry name'), - country: list[str] | None = Query(None, description='Country name'), - protocol: list[str] | None = Query(None, description='Protocol name'), category: list[str] | None = Query(None, description='Category name'), is_arb: bool | None = Query(None, description='Whether project is an ARB project'), - registered_at_from: datetime.date + transaction_type: list[str] | None = Query(None, description='Transaction type'), + vintage: list[int] | None = Query(None, description='Vintage'), + transaction_date_from: datetime.date | datetime.datetime | None = Query(default=None, description='Format: YYYY-MM-DD'), - registered_at_to: datetime.date + transaction_date_to: datetime.date | datetime.datetime | None = Query(default=None, description='Format: YYYY-MM-DD'), - started_at_from: datetime.date - | datetime.datetime - | None = Query(default=None, description='Format: YYYY-MM-DD'), - started_at_to: datetime.date - | datetime.datetime - | None = Query(default=None, description='Format: YYYY-MM-DD'), - issued_min: int | None = Query(None, description='Minimum number of issued credits'), - issued_max: int | None = Query(None, description='Maximum number of issued credits'), - retired_min: int | None = Query(None, description='Minimum number of retired credits'), - retired_max: int | None = Query(None, description='Maximum number of retired credits'), session: Session = Depends(get_session), ): """Get aggregated project registration data""" @@ -362,34 +355,35 @@ def get_credits_by_issuance_date( # join Credit with Project on project_id query = session.query(Credit).join(Project, Credit.project_id == Project.project_id) - # Apply filters - filterable_attributes = [ - ('registry', registry, 'ilike'), - ('country', country, 'ilike'), - ('protocol', protocol, 'ilike'), - ('category', category, 'ilike'), + # Filters applying 'ilike' operation + ilike_filters = [ + ('registry', registry, 'ilike', Project), + ('category', category, 'ilike', Project), + ('transaction_type', transaction_type, 'ilike', Credit), ] - for attribute, values, operation in filterable_attributes: + for attribute, values, operation, model in ilike_filters: query = apply_filters( - query=query, model=Project, attribute=attribute, values=values, operation=operation + query=query, model=model, attribute=attribute, values=values, operation=operation ) - other_filters = [ - ('is_arb', is_arb, '=='), - ('registered_at', registered_at_from, '>='), - ('registered_at', registered_at_to, '<='), - ('started_at', started_at_from, '>='), - ('started_at', started_at_to, '<='), - ('issued', issued_min, '>='), - ('issued', issued_max, '<='), - ('retired', retired_min, '>='), - ('retired', retired_max, '<='), + # Filter applying '==' operation + equal_filters = [('is_arb', is_arb, '==', Project), ('vintage', vintage, '==', Credit)] + + for attribute, values, operation, model in equal_filters: + query = apply_filters( + query=query, model=model, attribute=attribute, values=values, operation=operation + ) + + # Filters applying '>=' or '<=' operations + date_filters = [ + ('transaction_date', transaction_date_from, '>=', Credit), + ('transaction_date', transaction_date_to, '<=', Credit), ] - for attribute, values, operation in other_filters: + for attribute, values, operation, model in date_filters: query = apply_filters( - query=query, model=Project, attribute=attribute, values=values, operation=operation + query=query, model=model, attribute=attribute, values=values, operation=operation ) return credits_by_issuance_date(query=query, freq=freq) From 006414b7e371156b5c812e1a7c3c3275b4e1a52d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 11 Aug 2023 11:12:19 -0700 Subject: [PATCH 17/17] refactor credits_by_transaction_date --- carbonplan_offsets_db/routers/charts.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py index 0e20c8f..367919d 100644 --- a/carbonplan_offsets_db/routers/charts.py +++ b/carbonplan_offsets_db/routers/charts.py @@ -207,9 +207,9 @@ def get_binned_numeric_data(*, query, binning_attribute): return formatted_results -def credits_by_issuance_date(*, query, freq='Y'): - """Generate binned data based on the issuance date.""" - logger.info('📊 Generating binned data based on issuance date...') +def credits_by_transaction_date(*, query, freq='Y'): + """Generate binned data based on the transaction date.""" + logger.info('📊 Generating binned data based on transaction date...') # Extract the minimum and maximum transaction_date min_date, max_date = query.with_entities( @@ -332,8 +332,8 @@ def get_projects_by_registration_date( return get_binned_data(binning_attribute='registered_at', query=query, freq=freq) -@router.get('/credits_by_issuance_date', response_model=list[dict]) -def get_credits_by_issuance_date( +@router.get('/credits_by_transaction_date', response_model=list[dict]) +def get_credits_by_transaction_date( request: Request, freq: typing.Literal['D', 'W', 'M', 'Y'] = Query('Y', description='Frequency of bins'), registry: list[Registries] | None = Query(None, description='Registry name'), @@ -349,8 +349,8 @@ def get_credits_by_issuance_date( | None = Query(default=None, description='Format: YYYY-MM-DD'), session: Session = Depends(get_session), ): - """Get aggregated project registration data""" - logger.info(f'Getting project registration data: {request.url}') + """Get aggregated credit transaction data""" + logger.info(f'Getting credit transaction data: {request.url}') # join Credit with Project on project_id query = session.query(Credit).join(Project, Credit.project_id == Project.project_id) @@ -386,4 +386,4 @@ def get_credits_by_issuance_date( query=query, model=model, attribute=attribute, values=values, operation=operation ) - return credits_by_issuance_date(query=query, freq=freq) + return credits_by_transaction_date(query=query, freq=freq)