From 54daf1c43df0345dc6c8f251548fc66252adb889 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Tue, 8 Aug 2023 15:03:10 -0700
Subject: [PATCH 01/17] add charts endpoints

---
 carbonplan_offsets_db/main.py           |  3 ++-
 carbonplan_offsets_db/routers/charts.py | 35 +++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 carbonplan_offsets_db/routers/charts.py

diff --git a/carbonplan_offsets_db/main.py b/carbonplan_offsets_db/main.py
index 83b178d..58f8a91 100644
--- a/carbonplan_offsets_db/main.py
+++ b/carbonplan_offsets_db/main.py
@@ -7,7 +7,7 @@
 from .app_metadata import metadata
 from .logging import get_logger
 from .models import Credit, Project
-from .routers import credits, files, health, projects
+from .routers import charts, credits, files, health, projects
 from .settings import get_settings
 from .tasks import calculate_totals, export_table_to_csv, update_credit_stats, update_project_stats
 
@@ -33,6 +33,7 @@ def create_application() -> FastAPI:
     application.include_router(health.router, prefix='/health', tags=['health'])
     application.include_router(projects.router, prefix='/projects', tags=['projects'])
     application.include_router(credits.router, prefix='/credits', tags=['credits'])
+    application.include_router(charts.router, prefix='/charts', tags=['charts'])
     application.include_router(files.router, prefix='/files', tags=['files'])
 
     return application
diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
new file mode 100644
index 0000000..669fea2
--- /dev/null
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -0,0 +1,35 @@
+from fastapi import APIRouter, Depends, Request
+from sqlmodel import Session, text
+
+from ..database import get_session
+from ..logging import get_logger
+
+router = APIRouter()
+logger = get_logger()
+
+
+@router.get('/project_registration')
+def get_project_registration(request: Request, session: Session = Depends(get_session)):
+    """Get project registration data"""
+    logger.info(f'Getting project registration data: {request.url}')
+    stmt = text(
+        """
+        SELECT
+            width_bucket(
+                extract(year FROM age(now(), registered_at)),
+                0,
+                2,
+                15
+            ) AS bin,
+            count(*)
+        FROM
+            project
+        GROUP BY
+            bin
+        ORDER BY
+            bin
+    """
+    )
+
+    result = session.execute(stmt)
+    return result.all()

From 042f87b999c3ff3adc0756878928be4ecc30b5aa Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Tue, 8 Aug 2023 17:07:40 -0700
Subject: [PATCH 02/17] add function for getting binned data

---
 carbonplan_offsets_db/routers/charts.py | 76 ++++++++++++++++++-------
 1 file changed, 55 insertions(+), 21 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 669fea2..0914f9e 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -1,35 +1,69 @@
+import datetime
+
 from fastapi import APIRouter, Depends, Request
-from sqlmodel import Session, text
+from sqlmodel import Session, and_, case, func
 
 from ..database import get_session
 from ..logging import get_logger
+from ..models import Project
 
 router = APIRouter()
 logger = get_logger()
 
 
+def get_binned_data(session, num_bins):
+    # Get the min and max date from registered_at
+    min_date, max_date = session.query(
+        func.min(Project.registered_at), func.max(Project.registered_at)
+    ).one()
+
+    # Calculate the bin width
+    bin_width = (max_date - min_date) / num_bins
+
+    # Define the binning logic using a combination of the CASE statement and basic arithmetic
+    conditions = [
+        (
+            and_(
+                Project.registered_at >= min_date + i * bin_width,
+                Project.registered_at < min_date + (i + 1) * bin_width,
+            ),
+            f'{(min_date + i*bin_width).year}-{(min_date + (i+1)*bin_width).year}',
+        )
+        for i in range(num_bins - 1)
+    ]
+    last_bin_label = (
+        f'{(min_date + (num_bins-1)*bin_width).year}-present'
+        if max_date.year == datetime.datetime.now().year
+        else f'{(min_date + (num_bins-1)*bin_width).year}-{max_date.year}'
+    )
+    conditions.append(
+        (Project.registered_at >= min_date + (num_bins - 1) * bin_width, last_bin_label)
+    )
+
+    binned_date = case(conditions, else_='other').label('bin')
+
+    # Query with the binning logic
+    binned_results = (
+        session.query(binned_date, Project.category, func.count(Project.id).label('count'))
+        .group_by(binned_date, Project.category)
+        .all()
+    )
+
+    total_projects = session.query(Project).count()
+    total_binned_counts = sum(result[2] for result in binned_results)
+
+    if total_projects != total_binned_counts:
+        raise ValueError(
+            f'Total projects ({total_projects}) does not match sum of binned counts ({total_binned_counts}).'
+        )
+
+    return binned_results
+
+
 @router.get('/project_registration')
 def get_project_registration(request: Request, session: Session = Depends(get_session)):
     """Get project registration data"""
     logger.info(f'Getting project registration data: {request.url}')
-    stmt = text(
-        """
-        SELECT
-            width_bucket(
-                extract(year FROM age(now(), registered_at)),
-                0,
-                2,
-                15
-            ) AS bin,
-            count(*)
-        FROM
-            project
-        GROUP BY
-            bin
-        ORDER BY
-            bin
-    """
-    )
 
-    result = session.execute(stmt)
-    return result.all()
+    results = get_binned_data(session, 15)
+    return results

From 46cc12f30bbc8a5e68aed923b3c6e18acb9903b2 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Tue, 8 Aug 2023 17:12:15 -0700
Subject: [PATCH 03/17] add num_bins to query parameters

---
 carbonplan_offsets_db/routers/charts.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 0914f9e..6a1a35c 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -1,6 +1,6 @@
 import datetime
 
-from fastapi import APIRouter, Depends, Request
+from fastapi import APIRouter, Depends, Query, Request
 from sqlmodel import Session, and_, case, func
 
 from ..database import get_session
@@ -11,7 +11,7 @@
 logger = get_logger()
 
 
-def get_binned_data(session, num_bins):
+def get_binned_data(*, session, num_bins):
     # Get the min and max date from registered_at
     min_date, max_date = session.query(
         func.min(Project.registered_at), func.max(Project.registered_at)
@@ -61,9 +61,12 @@ def get_binned_data(session, num_bins):
 
 
 @router.get('/project_registration')
-def get_project_registration(request: Request, session: Session = Depends(get_session)):
-    """Get project registration data"""
+def get_project_registration(
+    request: Request,
+    num_bins: int = Query(15, description='The number of bins'),
+    session: Session = Depends(get_session),
+):
+    """Get aggregated project registration data"""
     logger.info(f'Getting project registration data: {request.url}')
 
-    results = get_binned_data(session, 15)
-    return results
+    return get_binned_data(session=session, num_bins=num_bins)

From 1800b76204cf8d62c218a9fea582e0f86ec3c209 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Tue, 8 Aug 2023 17:26:00 -0700
Subject: [PATCH 04/17] add comments

---
 carbonplan_offsets_db/routers/charts.py | 29 +++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 6a1a35c..1e2a56e 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -12,15 +12,32 @@
 
 
 def get_binned_data(*, session, num_bins):
-    # Get the min and max date from registered_at
+    """
+    This function bins the projects based on their registration date and groups them by category.
+
+    Parameters
+    ----------
+    session: Session
+        SQLAlchemy session for querying the database.
+    num_bins: int,
+        Number of bins to divide the registration dates into.
+
+    Returns
+    -------
+    binned_results: list
+        A list of tuples, each containing the bin label, category, and count of projects.
+    """
+
+    # Determine the earliest and latest registration dates in the database.
     min_date, max_date = session.query(
         func.min(Project.registered_at), func.max(Project.registered_at)
     ).one()
 
-    # Calculate the bin width
+    # Calculate the width of each bin by dividing the total date range by the number of bins.
     bin_width = (max_date - min_date) / num_bins
 
-    # Define the binning logic using a combination of the CASE statement and basic arithmetic
+    # Create conditions for each bin. Each condition checks if a project's registration date
+    # falls within the range defined by a bin's start and end dates. Also, assign a label for each bin.
     conditions = [
         (
             and_(
@@ -31,6 +48,8 @@ def get_binned_data(*, session, num_bins):
         )
         for i in range(num_bins - 1)
     ]
+
+    # Handle the last bin separately to account for the possibility that the max_date is the current year.
     last_bin_label = (
         f'{(min_date + (num_bins-1)*bin_width).year}-present'
         if max_date.year == datetime.datetime.now().year
@@ -40,15 +59,17 @@ def get_binned_data(*, session, num_bins):
         (Project.registered_at >= min_date + (num_bins - 1) * bin_width, last_bin_label)
     )
 
+    # Using the conditions, generate a CASE statement to assign a bin label to each project.
     binned_date = case(conditions, else_='other').label('bin')
 
-    # Query with the binning logic
+    # Execute the main query, grouping projects by bin and category, and counting the number of projects in each group.
     binned_results = (
         session.query(binned_date, Project.category, func.count(Project.id).label('count'))
         .group_by(binned_date, Project.category)
         .all()
     )
 
+    # Validate that the sum of counts from the binned results matches the total number of projects in the database.
     total_projects = session.query(Project).count()
     total_binned_counts = sum(result[2] for result in binned_results)
 

From 1282e830830c0aebd47eddacd0b789a74611771a Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Wed, 9 Aug 2023 14:05:26 -0700
Subject: [PATCH 05/17] remove the special `max_date.year` case

---
 carbonplan_offsets_db/routers/charts.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 1e2a56e..6739333 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -1,5 +1,3 @@
-import datetime
-
 from fastapi import APIRouter, Depends, Query, Request
 from sqlmodel import Session, and_, case, func
 
@@ -46,19 +44,9 @@ def get_binned_data(*, session, num_bins):
             ),
             f'{(min_date + i*bin_width).year}-{(min_date + (i+1)*bin_width).year}',
         )
-        for i in range(num_bins - 1)
+        for i in range(num_bins)
     ]
 
-    # Handle the last bin separately to account for the possibility that the max_date is the current year.
-    last_bin_label = (
-        f'{(min_date + (num_bins-1)*bin_width).year}-present'
-        if max_date.year == datetime.datetime.now().year
-        else f'{(min_date + (num_bins-1)*bin_width).year}-{max_date.year}'
-    )
-    conditions.append(
-        (Project.registered_at >= min_date + (num_bins - 1) * bin_width, last_bin_label)
-    )
-
     # Using the conditions, generate a CASE statement to assign a bin label to each project.
     binned_date = case(conditions, else_='other').label('bin')
 

From 03c8fec16699c369a1101cb2479d4237223da7e4 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Wed, 9 Aug 2023 14:29:37 -0700
Subject: [PATCH 06/17] return start and end instead of concatenated bin string
 per @katamartin's feedback

---
 carbonplan_offsets_db/models.py         |  7 +++++++
 carbonplan_offsets_db/routers/charts.py | 22 ++++++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/carbonplan_offsets_db/models.py b/carbonplan_offsets_db/models.py
index e002370..9725c1d 100644
--- a/carbonplan_offsets_db/models.py
+++ b/carbonplan_offsets_db/models.py
@@ -139,3 +139,10 @@ class CreditStatsWithPagination(pydantic.BaseModel):
 class ProjectStatsWithPagination(pydantic.BaseModel):
     pagination: Pagination
     data: list[ProjectStats]
+
+
+class ProjectBinnedRegistration(pydantic.BaseModel):
+    start: int | None
+    end: int | None
+    category: str | None
+    count: int | None
diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 6739333..c837349 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -3,7 +3,7 @@
 
 from ..database import get_session
 from ..logging import get_logger
-from ..models import Project
+from ..models import Project, ProjectBinnedRegistration
 
 router = APIRouter()
 logger = get_logger()
@@ -26,6 +26,7 @@ def get_binned_data(*, session, num_bins):
         A list of tuples, each containing the bin label, category, and count of projects.
     """
 
+    logger.info('📊 Generating binned data...')
     # Determine the earliest and latest registration dates in the database.
     min_date, max_date = session.query(
         func.min(Project.registered_at), func.max(Project.registered_at)
@@ -53,23 +54,32 @@ def get_binned_data(*, session, num_bins):
     # Execute the main query, grouping projects by bin and category, and counting the number of projects in each group.
     binned_results = (
         session.query(binned_date, Project.category, func.count(Project.id).label('count'))
-        .group_by(binned_date, Project.category)
+        .group_by('bin', Project.category)
         .all()
     )
 
     # Validate that the sum of counts from the binned results matches the total number of projects in the database.
     total_projects = session.query(Project).count()
     total_binned_counts = sum(result[2] for result in binned_results)
-
     if total_projects != total_binned_counts:
+        logger.error('❌ Mismatch in total counts!')
         raise ValueError(
-            f'Total projects ({total_projects}) does not match sum of binned counts ({total_binned_counts}).'
+            f"Total projects ({total_projects}) doesn't match sum of binned counts ({total_binned_counts})."
+        )
+
+    # Reformat results to a more concise representation.
+    formatted_results = []
+    for bin_label, category, count in binned_results:
+        start, end = (
+            (int(part) for part in bin_label.split('-')) if '-' in bin_label else (None, None)
         )
+        formatted_results.append({'start': start, 'end': end, 'category': category, 'count': count})
 
-    return binned_results
+    logger.info('✅ Binned data generated successfully!')
+    return formatted_results
 
 
-@router.get('/project_registration')
+@router.get('/project_registration', response_model=list[ProjectBinnedRegistration])
 def get_project_registration(
     request: Request,
     num_bins: int = Query(15, description='The number of bins'),

From a563c5de4556b56ec3ad9304ae9361e5b65fdfff Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Wed, 9 Aug 2023 14:59:54 -0700
Subject: [PATCH 07/17] add filters

---
 carbonplan_offsets_db/routers/charts.py | 102 ++++++++++++++++++++++--
 1 file changed, 95 insertions(+), 7 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index c837349..3c4a190 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -1,15 +1,19 @@
+import datetime
+
 from fastapi import APIRouter, Depends, Query, Request
-from sqlmodel import Session, and_, case, func
+from sqlmodel import Session, and_, case, func, or_
 
 from ..database import get_session
 from ..logging import get_logger
 from ..models import Project, ProjectBinnedRegistration
+from ..query_helpers import apply_filters
+from ..schemas import Registries
 
 router = APIRouter()
 logger = get_logger()
 
 
-def get_binned_data(*, session, num_bins):
+def get_binned_data(*, session, num_bins, projects=None):
     """
     This function bins the projects based on their registration date and groups them by category.
 
@@ -19,6 +23,8 @@ def get_binned_data(*, session, num_bins):
         SQLAlchemy session for querying the database.
     num_bins: int,
         Number of bins to divide the registration dates into.
+    projects: list, optional
+        List of projects to be binned. If not provided, the function will query the entire Project table.
 
     Returns
     -------
@@ -27,10 +33,21 @@ def get_binned_data(*, session, num_bins):
     """
 
     logger.info('📊 Generating binned data...')
-    # Determine the earliest and latest registration dates in the database.
-    min_date, max_date = session.query(
-        func.min(Project.registered_at), func.max(Project.registered_at)
-    ).one()
+    if projects:
+        # Extract dates from provided projects, filtering out None values
+        registration_dates = [
+            project.registered_at for project in projects if project.registered_at is not None
+        ]
+        if not registration_dates:
+            logger.error('❌ No valid registration dates found!')
+            raise ValueError('Provided projects have no valid registration dates.')
+        min_date = min(registration_dates)
+        max_date = max(registration_dates)
+    else:
+        # Determine the earliest and latest registration dates in the database.
+        min_date, max_date = session.query(
+            func.min(Project.registered_at), func.max(Project.registered_at)
+        ).one()
 
     # Calculate the width of each bin by dividing the total date range by the number of bins.
     bin_width = (max_date - min_date) / num_bins
@@ -83,9 +100,80 @@ def get_binned_data(*, session, num_bins):
 def get_project_registration(
     request: Request,
     num_bins: int = Query(15, description='The number of bins'),
+    registry: list[Registries] | None = Query(None, description='Registry name'),
+    country: list[str] | None = Query(None, description='Country name'),
+    protocol: list[str] | None = Query(None, description='Protocol name'),
+    category: list[str] | None = Query(None, description='Category name'),
+    is_arb: bool | None = Query(None, description='Whether project is an ARB project'),
+    registered_at_from: datetime.date
+    | datetime.datetime
+    | None = Query(default=None, description='Format: YYYY-MM-DD'),
+    registered_at_to: datetime.date
+    | datetime.datetime
+    | None = Query(default=None, description='Format: YYYY-MM-DD'),
+    started_at_from: datetime.date
+    | datetime.datetime
+    | None = Query(default=None, description='Format: YYYY-MM-DD'),
+    started_at_to: datetime.date
+    | datetime.datetime
+    | None = Query(default=None, description='Format: YYYY-MM-DD'),
+    issued_min: int | None = Query(None, description='Minimum number of issued credits'),
+    issued_max: int | None = Query(None, description='Maximum number of issued credits'),
+    retired_min: int | None = Query(None, description='Minimum number of retired credits'),
+    retired_max: int | None = Query(None, description='Maximum number of retired credits'),
+    search: str
+    | None = Query(
+        None,
+        description='Case insensitive search string. Currently searches on `project_id` and `name` fields only.',
+    ),
     session: Session = Depends(get_session),
 ):
     """Get aggregated project registration data"""
     logger.info(f'Getting project registration data: {request.url}')
 
-    return get_binned_data(session=session, num_bins=num_bins)
+    query = session.query(Project)
+
+    # Apply filters
+    filterable_attributes = [
+        ('registry', registry, 'ilike'),
+        ('country', country, 'ilike'),
+        ('protocol', protocol, 'ilike'),
+        ('category', category, 'ilike'),
+    ]
+
+    for attribute, values, operation in filterable_attributes:
+        query = apply_filters(
+            query=query, model=Project, attribute=attribute, values=values, operation=operation
+        )
+
+    other_filters = [
+        ('is_arb', is_arb, '=='),
+        ('registered_at', registered_at_from, '>='),
+        ('registered_at', registered_at_to, '<='),
+        ('started_at', started_at_from, '>='),
+        ('started_at', started_at_to, '<='),
+        ('issued', issued_min, '>='),
+        ('issued', issued_max, '<='),
+        ('retired', retired_min, '>='),
+        ('retired', retired_max, '<='),
+    ]
+
+    for attribute, values, operation in other_filters:
+        query = apply_filters(
+            query=query, model=Project, attribute=attribute, values=values, operation=operation
+        )
+
+    if search:
+        search_pattern = f'%{search}%'
+        query = query.filter(
+            or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern))
+        )
+
+    # Fetch filtered projects for binning
+    filtered_projects = query.all()
+    # Check if the filtered projects list is empty
+    if not filtered_projects:
+        logger.warning('⚠️ No projects found matching the filtering criteria!')
+        return []
+
+    return get_binned_data(session=session, num_bins=num_bins, projects=filtered_projects)

From 5a0b33253dc8d6f4c8e9c479ac77e5fc511fd27b Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Wed, 9 Aug 2023 15:26:27 -0700
Subject: [PATCH 08/17] generalize get_binned_data

---
 carbonplan_offsets_db/routers/charts.py | 109 ++++++++++++++----------
 1 file changed, 66 insertions(+), 43 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 3c4a190..cf33edd 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -13,69 +13,88 @@
 logger = get_logger()
 
 
-def get_binned_data(*, session, num_bins, projects=None):
+def get_binned_data(*, session, num_bins, binning_attribute, projects=None):
     """
-    This function bins the projects based on their registration date and groups them by category.
+    This function bins the projects based on a specified attribute and groups them by category.
 
     Parameters
     ----------
     session: Session
         SQLAlchemy session for querying the database.
-    num_bins: int,
-        Number of bins to divide the registration dates into.
+    num_bins: int
+        Number of bins to divide the data into.
+    binning_attribute: str
+        Attribute name of the Project model to be used for binning (e.g., 'registered_at' or 'issued').
     projects: list, optional
         List of projects to be binned. If not provided, the function will query the entire Project table.
 
     Returns
     -------
     binned_results: list
-        A list of tuples, each containing the bin label, category, and count of projects.
+        A list of dictionaries, each containing the bin start, end, category, and count of projects.
     """
 
-    logger.info('📊 Generating binned data...')
+    logger.info(f'📊 Generating binned data based on {binning_attribute}...')
+
+    # Dynamically get the attribute from the Project model based on the provided binning_attribute
+    attribute = getattr(Project, binning_attribute)
+
+    # If projects are provided, extract values for the given binning_attribute. Otherwise, query the database.
     if projects:
-        # Extract dates from provided projects, filtering out None values
-        registration_dates = [
-            project.registered_at for project in projects if project.registered_at is not None
+        values = [
+            getattr(project, binning_attribute)
+            for project in projects
+            if getattr(project, binning_attribute) is not None
         ]
-        if not registration_dates:
-            logger.error('❌ No valid registration dates found!')
-            raise ValueError('Provided projects have no valid registration dates.')
-        min_date = min(registration_dates)
-        max_date = max(registration_dates)
+        if not values:
+            logger.error(f'❌ No valid values found for attribute {binning_attribute}!')
+            raise ValueError(f'Provided projects have no valid values for {binning_attribute}.')
+        min_value, max_value = min(values), max(values)
     else:
-        # Determine the earliest and latest registration dates in the database.
-        min_date, max_date = session.query(
-            func.min(Project.registered_at), func.max(Project.registered_at)
-        ).one()
-
-    # Calculate the width of each bin by dividing the total date range by the number of bins.
-    bin_width = (max_date - min_date) / num_bins
-
-    # Create conditions for each bin. Each condition checks if a project's registration date
-    # falls within the range defined by a bin's start and end dates. Also, assign a label for each bin.
-    conditions = [
-        (
-            and_(
-                Project.registered_at >= min_date + i * bin_width,
-                Project.registered_at < min_date + (i + 1) * bin_width,
-            ),
-            f'{(min_date + i*bin_width).year}-{(min_date + (i+1)*bin_width).year}',
-        )
-        for i in range(num_bins)
-    ]
+        # Get the minimum and maximum values for the attribute from the database
+        min_value, max_value = session.query(func.min(attribute), func.max(attribute)).one()
+
+    # Calculate the width for each bin
+    bin_width = (max_value - min_value) / num_bins
+
+    # Create conditions for each bin. These conditions will determine which bin a project falls into.
+    # Check if the binning attribute is a date type
+    if isinstance(min_value, datetime.date | datetime.datetime):
+        # Create conditions for each bin. These conditions will determine which bin a project falls into for date attributes.
+        conditions = [
+            (
+                and_(
+                    attribute >= min_value + datetime.timedelta(days=i * bin_width.days),
+                    attribute < min_value + datetime.timedelta(days=(i + 1) * bin_width.days),
+                ),
+                f'{(min_value + datetime.timedelta(days=i * bin_width.days)).year}-{(min_value + datetime.timedelta(days=(i + 1) * bin_width.days)).year}',
+            )
+            for i in range(num_bins)
+        ]
+    else:
+        # Create conditions for each bin. These conditions will determine which bin a project falls into for numerical attributes.
+        conditions = [
+            (
+                and_(
+                    attribute >= min_value + i * bin_width,
+                    attribute < min_value + (i + 1) * bin_width,
+                ),
+                f'{min_value + i*bin_width}-{min_value + (i+1)*bin_width}',
+            )
+            for i in range(num_bins)
+        ]
 
     # Using the conditions, generate a CASE statement to assign a bin label to each project.
-    binned_date = case(conditions, else_='other').label('bin')
+    binned_attribute = case(conditions, else_='other').label('bin')
 
-    # Execute the main query, grouping projects by bin and category, and counting the number of projects in each group.
+    # Query the database, grouping by the calculated bin and category. Count the number of projects in each group.
     binned_results = (
-        session.query(binned_date, Project.category, func.count(Project.id).label('count'))
+        session.query(binned_attribute, Project.category, func.count(Project.id).label('count'))
         .group_by('bin', Project.category)
         .all()
     )
 
-    # Validate that the sum of counts from the binned results matches the total number of projects in the database.
+    # Validate that the counts from binned results match the total number of projects.
     total_projects = session.query(Project).count()
     total_binned_counts = sum(result[2] for result in binned_results)
     if total_projects != total_binned_counts:
@@ -84,12 +103,11 @@ def get_binned_data(*, session, num_bins, projects=None):
             f"Total projects ({total_projects}) doesn't match sum of binned counts ({total_binned_counts})."
         )
 
-    # Reformat results to a more concise representation.
+    # Reformat results to be more user-friendly
     formatted_results = []
     for bin_label, category, count in binned_results:
-        start, end = (
-            (int(part) for part in bin_label.split('-')) if '-' in bin_label else (None, None)
-        )
+        logger.info(bin_label)
+        start, end = (part for part in bin_label.split('-')) if '-' in bin_label else (None, None)
         formatted_results.append({'start': start, 'end': end, 'category': category, 'count': count})
 
     logger.info('✅ Binned data generated successfully!')
@@ -176,4 +194,9 @@ def get_project_registration(
         logger.warning('⚠️ No projects found matching the filtering criteria!')
         return []
 
-    return get_binned_data(session=session, num_bins=num_bins, projects=filtered_projects)
+    return get_binned_data(
+        session=session,
+        num_bins=num_bins,
+        binning_attribute='registered_at',
+        projects=filtered_projects,
+    )

From 971b2157a274f037b90f43d4e4c686a2055911b5 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Wed, 9 Aug 2023 16:26:05 -0700
Subject: [PATCH 09/17] add issuance_totals endpoint

---
 carbonplan_offsets_db/models.py         |   8 +-
 carbonplan_offsets_db/routers/charts.py | 128 +++++++++++++++++++++---
 2 files changed, 116 insertions(+), 20 deletions(-)

diff --git a/carbonplan_offsets_db/models.py b/carbonplan_offsets_db/models.py
index 9725c1d..deff289 100644
--- a/carbonplan_offsets_db/models.py
+++ b/carbonplan_offsets_db/models.py
@@ -141,8 +141,8 @@ class ProjectStatsWithPagination(pydantic.BaseModel):
     data: list[ProjectStats]
 
 
-class ProjectBinnedRegistration(pydantic.BaseModel):
-    start: int | None
-    end: int | None
+class ProjectBinnedData(pydantic.BaseModel):
+    start: int | float | None
+    end: int | float | None
     category: str | None
-    count: int | None
+    value: int | float | None
diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index cf33edd..d4e074d 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -5,7 +5,7 @@
 
 from ..database import get_session
 from ..logging import get_logger
-from ..models import Project, ProjectBinnedRegistration
+from ..models import Project, ProjectBinnedData
 from ..query_helpers import apply_filters
 from ..schemas import Registries
 
@@ -88,33 +88,41 @@ def get_binned_data(*, session, num_bins, binning_attribute, projects=None):
     binned_attribute = case(conditions, else_='other').label('bin')
 
     # Query the database, grouping by the calculated bin and category. Count the number of projects in each group.
-    binned_results = (
-        session.query(binned_attribute, Project.category, func.count(Project.id).label('count'))
-        .group_by('bin', Project.category)
-        .all()
-    )
+    if binning_attribute == 'issued':
+        query = session.query(
+            binned_attribute, Project.category, func.sum(Project.issued).label('value')
+        )
+        total_values = session.query(func.sum(Project.issued)).scalar()
+
+    else:
+        query = session.query(
+            binned_attribute, Project.category, func.count(Project.id).label('value')
+        )
+        total_values = session.query(func.count(Project.id)).scalar()
+    binned_results = query.group_by('bin', Project.category).all()
 
     # Validate that the counts from binned results match the total number of projects.
-    total_projects = session.query(Project).count()
-    total_binned_counts = sum(result[2] for result in binned_results)
-    if total_projects != total_binned_counts:
-        logger.error('❌ Mismatch in total counts!')
+    total_binned_values = sum(result[2] for result in binned_results)
+    logger.info(f'Total values: {total_values}, Total binned values: {total_binned_values}')
+    if total_values != total_binned_values:
+        logger.error('❌ Mismatch in total values!')
         raise ValueError(
-            f"Total projects ({total_projects}) doesn't match sum of binned counts ({total_binned_counts})."
+            f"Total values ({total_values}) doesn't match sum of binned values ({total_binned_values})."
         )
 
     # Reformat results to be more user-friendly
     formatted_results = []
-    for bin_label, category, count in binned_results:
-        logger.info(bin_label)
-        start, end = (part for part in bin_label.split('-')) if '-' in bin_label else (None, None)
-        formatted_results.append({'start': start, 'end': end, 'category': category, 'count': count})
+    for bin_label, category, value in binned_results:
+        start, end = iter(bin_label.split('-')) if '-' in bin_label else (None, None)
+        if start and end:
+            start, end = int(float(start)), int(float(end))
+        formatted_results.append({'start': start, 'end': end, 'category': category, 'value': value})
 
     logger.info('✅ Binned data generated successfully!')
     return formatted_results
 
 
-@router.get('/project_registration', response_model=list[ProjectBinnedRegistration])
+@router.get('/project_registration', response_model=list[ProjectBinnedData])
 def get_project_registration(
     request: Request,
     num_bins: int = Query(15, description='The number of bins'),
@@ -200,3 +208,91 @@ def get_project_registration(
         binning_attribute='registered_at',
         projects=filtered_projects,
     )
+
+
+@router.get('/issuance_totals', response_model=list[ProjectBinnedData])
+def get_issuance_totals(
+    request: Request,
+    num_bins: int = Query(15, description='The number of bins'),
+    registry: list[Registries] | None = Query(None, description='Registry name'),
+    country: list[str] | None = Query(None, description='Country name'),
+    protocol: list[str] | None = Query(None, description='Protocol name'),
+    category: list[str] | None = Query(None, description='Category name'),
+    is_arb: bool | None = Query(None, description='Whether project is an ARB project'),
+    registered_at_from: datetime.date
+    | datetime.datetime
+    | None = Query(default=None, description='Format: YYYY-MM-DD'),
+    registered_at_to: datetime.date
+    | datetime.datetime
+    | None = Query(default=None, description='Format: YYYY-MM-DD'),
+    started_at_from: datetime.date
+    | datetime.datetime
+    | None = Query(default=None, description='Format: YYYY-MM-DD'),
+    started_at_to: datetime.date
+    | datetime.datetime
+    | None = Query(default=None, description='Format: YYYY-MM-DD'),
+    issued_min: int | None = Query(None, description='Minimum number of issued credits'),
+    issued_max: int | None = Query(None, description='Maximum number of issued credits'),
+    retired_min: int | None = Query(None, description='Minimum number of retired credits'),
+    retired_max: int | None = Query(None, description='Maximum number of retired credits'),
+    search: str
+    | None = Query(
+        None,
+        description='Case insensitive search string. Currently searches on `project_id` and `name` fields only.',
+    ),
+    session: Session = Depends(get_session),
+):
+    """Get aggregated project registration data"""
+    logger.info(f'Getting project registration data: {request.url}')
+
+    query = session.query(Project)
+
+    # Apply filters
+    filterable_attributes = [
+        ('registry', registry, 'ilike'),
+        ('country', country, 'ilike'),
+        ('protocol', protocol, 'ilike'),
+        ('category', category, 'ilike'),
+    ]
+
+    for attribute, values, operation in filterable_attributes:
+        query = apply_filters(
+            query=query, model=Project, attribute=attribute, values=values, operation=operation
+        )
+
+    other_filters = [
+        ('is_arb', is_arb, '=='),
+        ('registered_at', registered_at_from, '>='),
+        ('registered_at', registered_at_to, '<='),
+        ('started_at', started_at_from, '>='),
+        ('started_at', started_at_to, '<='),
+        ('issued', issued_min, '>='),
+        ('issued', issued_max, '<='),
+        ('retired', retired_min, '>='),
+        ('retired', retired_max, '<='),
+    ]
+
+    for attribute, values, operation in other_filters:
+        query = apply_filters(
+            query=query, model=Project, attribute=attribute, values=values, operation=operation
+        )
+
+    if search:
+        search_pattern = f'%{search}%'
+        query = query.filter(
+            or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern))
+        )
+
+    # Fetch filtered projects for binning
+    filtered_projects = query.all()
+    # Check if the filtered projects list is empty
+    if not filtered_projects:
+        logger.warning('⚠️ No projects found matching the filtering criteria!')
+        return []
+
+    return get_binned_data(
+        session=session,
+        num_bins=num_bins,
+        binning_attribute='issued',
+        projects=filtered_projects,
+    )

From c3cb32c1aeba5efa47fdce777b555b8d96dec50e Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Wed, 9 Aug 2023 16:29:35 -0700
Subject: [PATCH 10/17] rename models

---
 carbonplan_offsets_db/models.py         | 15 +++++++++++----
 carbonplan_offsets_db/routers/charts.py |  6 +++---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/carbonplan_offsets_db/models.py b/carbonplan_offsets_db/models.py
index deff289..7060f86 100644
--- a/carbonplan_offsets_db/models.py
+++ b/carbonplan_offsets_db/models.py
@@ -141,8 +141,15 @@ class ProjectStatsWithPagination(pydantic.BaseModel):
     data: list[ProjectStats]
 
 
-class ProjectBinnedData(pydantic.BaseModel):
-    start: int | float | None
-    end: int | float | None
+class ProjectBinnedRegistration(pydantic.BaseModel):
+    start: int | None
+    end: int | None
     category: str | None
-    value: int | float | None
+    value: int | None
+
+
+class ProjectBinnedIssuanceTotals(pydantic.BaseModel):
+    start: float | None
+    end: float | None
+    category: str | None
+    value: float | None
diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index d4e074d..12d8932 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -5,7 +5,7 @@
 
 from ..database import get_session
 from ..logging import get_logger
-from ..models import Project, ProjectBinnedData
+from ..models import Project, ProjectBinnedIssuanceTotals, ProjectBinnedRegistration
 from ..query_helpers import apply_filters
 from ..schemas import Registries
 
@@ -122,7 +122,7 @@ def get_binned_data(*, session, num_bins, binning_attribute, projects=None):
     return formatted_results
 
 
-@router.get('/project_registration', response_model=list[ProjectBinnedData])
+@router.get('/project_registration', response_model=list[ProjectBinnedRegistration])
 def get_project_registration(
     request: Request,
     num_bins: int = Query(15, description='The number of bins'),
@@ -210,7 +210,7 @@ def get_project_registration(
     )
 
 
-@router.get('/issuance_totals', response_model=list[ProjectBinnedData])
+@router.get('/issuance_totals', response_model=list[ProjectBinnedIssuanceTotals])
 def get_issuance_totals(
     request: Request,
     num_bins: int = Query(15, description='The number of bins'),

From 7ff1a3092de005b0aea575a10107476e5b2f1f23 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Thu, 10 Aug 2023 16:45:08 -0700
Subject: [PATCH 11/17] update binning

---
 carbonplan_offsets_db/models.py         |   4 +-
 carbonplan_offsets_db/routers/charts.py | 167 ++++++++++--------------
 2 files changed, 68 insertions(+), 103 deletions(-)

diff --git a/carbonplan_offsets_db/models.py b/carbonplan_offsets_db/models.py
index 7060f86..c88238f 100644
--- a/carbonplan_offsets_db/models.py
+++ b/carbonplan_offsets_db/models.py
@@ -142,8 +142,8 @@ class ProjectStatsWithPagination(pydantic.BaseModel):
 
 
 class ProjectBinnedRegistration(pydantic.BaseModel):
-    start: int | None
-    end: int | None
+    start: datetime.date | None
+    end: datetime.date | None
     category: str | None
     value: int | None
 
diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 12d8932..78de4d5 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -1,5 +1,7 @@
 import datetime
+import typing
 
+import pandas as pd
 from fastapi import APIRouter, Depends, Query, Request
 from sqlmodel import Session, and_, case, func, or_
 
@@ -13,119 +15,91 @@
 logger = get_logger()
 
 
-def get_binned_data(*, session, num_bins, binning_attribute, projects=None):
-    """
-    This function bins the projects based on a specified attribute and groups them by category.
+def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'M', 'Y']):
+    # Determine the end-of-period date based on the frequency
+    if freq == 'D':  # Daily frequency
+        end_of_period = pd.Timestamp(max_value)
+    elif freq == 'M':  # Monthly frequency
+        end_of_period = (
+            pd.Timestamp(max_value).replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1)
+        )
+    elif freq == 'Y':  # Yearly frequency
+        end_of_period = pd.Timestamp(max_value).replace(month=12, day=31)
+    else:
+        raise ValueError("Unsupported frequency. Use 'D', 'M', or 'Y'.")
+
+    # Generate date bins with the specified frequency
+    date_bins = pd.date_range(start=min_value, end=max_value, freq=freq)
+
+    # Ensure that the end-of-period date is included
+    if len(date_bins) == 0 or date_bins[-1] != end_of_period:
+        date_bins = date_bins.append(pd.DatetimeIndex([end_of_period]))
 
-    Parameters
-    ----------
-    session: Session
-        SQLAlchemy session for querying the database.
-    num_bins: int
-        Number of bins to divide the data into.
-    binning_attribute: str
-        Attribute name of the Project model to be used for binning (e.g., 'registered_at' or 'issued').
-    projects: list, optional
-        List of projects to be binned. If not provided, the function will query the entire Project table.
+    return date_bins
 
-    Returns
-    -------
-    binned_results: list
-        A list of dictionaries, each containing the bin start, end, category, and count of projects.
-    """
 
+def get_binned_data(*, query, binning_attribute):
     logger.info(f'📊 Generating binned data based on {binning_attribute}...')
 
     # Dynamically get the attribute from the Project model based on the provided binning_attribute
     attribute = getattr(Project, binning_attribute)
+    min_value, max_value = query.with_entities(func.min(attribute), func.max(attribute)).one()
 
-    # If projects are provided, extract values for the given binning_attribute. Otherwise, query the database.
-    if projects:
-        values = [
-            getattr(project, binning_attribute)
-            for project in projects
-            if getattr(project, binning_attribute) is not None
-        ]
-        if not values:
-            logger.error(f'❌ No valid values found for attribute {binning_attribute}!')
-            raise ValueError(f'Provided projects have no valid values for {binning_attribute}.')
-        min_value, max_value = min(values), max(values)
-    else:
-        # Get the minimum and maximum values for the attribute from the database
-        min_value, max_value = session.query(func.min(attribute), func.max(attribute)).one()
+    logger.info(f'📊 Min value: {min_value}, max value: {max_value}')
 
-    # Calculate the width for each bin
-    bin_width = (max_value - min_value) / num_bins
+    # Check if the binning attribute is a date type and create yearly bins using pandas date_range
+    if isinstance(min_value, datetime.date) or isinstance(min_value, datetime.datetime):
+        date_bins = generate_date_bins(min_value=min_value, max_value=max_value, freq='Y')
+
+        logger.info(f'📅 Binning by date with {date_bins} bins...')
 
-    # Create conditions for each bin. These conditions will determine which bin a project falls into.
-    # Check if the binning attribute is a date type
-    if isinstance(min_value, datetime.date | datetime.datetime):
-        # Create conditions for each bin. These conditions will determine which bin a project falls into for date attributes.
-        conditions = [
-            (
-                and_(
-                    attribute >= min_value + datetime.timedelta(days=i * bin_width.days),
-                    attribute < min_value + datetime.timedelta(days=(i + 1) * bin_width.days),
-                ),
-                f'{(min_value + datetime.timedelta(days=i * bin_width.days)).year}-{(min_value + datetime.timedelta(days=(i + 1) * bin_width.days)).year}',
-            )
-            for i in range(num_bins)
-        ]
-    else:
-        # Create conditions for each bin. These conditions will determine which bin a project falls into for numerical attributes.
         conditions = [
             (
                 and_(
-                    attribute >= min_value + i * bin_width,
-                    attribute < min_value + (i + 1) * bin_width,
+                    attribute >= date_bins[i],
+                    attribute < date_bins[i + 1],
                 ),
-                f'{min_value + i*bin_width}-{min_value + (i+1)*bin_width}',
+                str(date_bins[i].year),
             )
-            for i in range(num_bins)
+            for i in range(len(date_bins) - 1)
         ]
 
-    # Using the conditions, generate a CASE statement to assign a bin label to each project.
-    binned_attribute = case(conditions, else_='other').label('bin')
-
-    # Query the database, grouping by the calculated bin and category. Count the number of projects in each group.
-    if binning_attribute == 'issued':
-        query = session.query(
-            binned_attribute, Project.category, func.sum(Project.issued).label('value')
-        )
-        total_values = session.query(func.sum(Project.issued)).scalar()
-
-    else:
-        query = session.query(
-            binned_attribute, Project.category, func.count(Project.id).label('value')
+        # Check if there are any conditions
+        if conditions:
+            binned_attribute = case(conditions, else_='other').label('bin')
+        elif len(date_bins) == 1:
+            binned_attribute = func.concat(date_bins[0].year).label(
+                'bin'
+            )  # Use concat to return a string literal
+        else:
+            binned_attribute = 'other'
+
+        # Query the database, grouping by the calculated bin and category. Count the number of projects in each group.
+        query = query.with_entities(
+            binned_attribute, Project.category, func.count(Project.project_id).label('value')
         )
-        total_values = session.query(func.count(Project.id)).scalar()
-    binned_results = query.group_by('bin', Project.category).all()
-
-    # Validate that the counts from binned results match the total number of projects.
-    total_binned_values = sum(result[2] for result in binned_results)
-    logger.info(f'Total values: {total_values}, Total binned values: {total_binned_values}')
-    if total_values != total_binned_values:
-        logger.error('❌ Mismatch in total values!')
-        raise ValueError(
-            f"Total values ({total_values}) doesn't match sum of binned values ({total_binned_values})."
-        )
-
-    # Reformat results to be more user-friendly
-    formatted_results = []
-    for bin_label, category, value in binned_results:
-        start, end = iter(bin_label.split('-')) if '-' in bin_label else (None, None)
-        if start and end:
-            start, end = int(float(start)), int(float(end))
-        formatted_results.append({'start': start, 'end': end, 'category': category, 'value': value})
+        binned_results = query.group_by('bin', Project.category).all()
+        # Reformat results to be more user-friendly
+        formatted_results = []
+        for bin_label, category, value in binned_results:
+            if bin_label == 'other':
+                start, end = None, None
+            else:
+                start, end = datetime.date(int(bin_label), 1, 1), datetime.date(
+                    int(bin_label) + 1, 1, 1
+                )
+            formatted_results.append(
+                ProjectBinnedRegistration(start=start, end=end, category=category, value=value)
+            )
 
-    logger.info('✅ Binned data generated successfully!')
-    return formatted_results
+        logger.info('✅ Binned data generated successfully!')
+        return formatted_results
+    return []
 
 
-@router.get('/project_registration', response_model=list[ProjectBinnedRegistration])
-def get_project_registration(
+@router.get('/projects_by_registration_date', response_model=list[ProjectBinnedRegistration])
+def get_projects_by_registration_date(
     request: Request,
-    num_bins: int = Query(15, description='The number of bins'),
     registry: list[Registries] | None = Query(None, description='Registry name'),
     country: list[str] | None = Query(None, description='Country name'),
     protocol: list[str] | None = Query(None, description='Protocol name'),
@@ -195,18 +169,9 @@ def get_project_registration(
             or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern))
         )
 
-    # Fetch filtered projects for binning
-    filtered_projects = query.all()
-    # Check if the filtered projects list is empty
-    if not filtered_projects:
-        logger.warning('⚠️ No projects found matching the filtering criteria!')
-        return []
-
     return get_binned_data(
-        session=session,
-        num_bins=num_bins,
         binning_attribute='registered_at',
-        projects=filtered_projects,
+        query=query,
     )
 
 

From a141dd71efa351faf737d96fd930dd7cca96f260 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Thu, 10 Aug 2023 17:42:48 -0700
Subject: [PATCH 12/17] refactor

---
 carbonplan_offsets_db/routers/charts.py | 123 ++++++++++++------------
 1 file changed, 59 insertions(+), 64 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 78de4d5..33a2af6 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 from fastapi import APIRouter, Depends, Query, Request
-from sqlmodel import Session, and_, case, func, or_
+from sqlmodel import Session, and_, case, func, or_, text
 
 from ..database import get_session
 from ..logging import get_logger
@@ -15,91 +15,89 @@
 logger = get_logger()
 
 
-def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'M', 'Y']):
-    # Determine the end-of-period date based on the frequency
-    if freq == 'D':  # Daily frequency
-        end_of_period = pd.Timestamp(max_value)
-    elif freq == 'M':  # Monthly frequency
+def calculate_end_date(start_date, freq):
+    """Calculate the end date based on the start date and frequency."""
+    if freq == 'D':
+        return start_date + pd.DateOffset(days=1)
+    elif freq == 'W':
+        return start_date + pd.DateOffset(weeks=1)
+    elif freq == 'M':
+        return start_date + pd.DateOffset(months=1) + pd.offsets.MonthEnd(0)
+    else:  # freq == 'Y'
+        return start_date + pd.DateOffset(years=1) + pd.offsets.YearEnd(0)
+
+
+def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'W', 'M', 'Y']):
+    """Generate date bins with the specified frequency."""
+    end_of_period = pd.Timestamp(max_value)
+    if freq == 'M':
         end_of_period = (
-            pd.Timestamp(max_value).replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1)
+            end_of_period.replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1)
         )
-    elif freq == 'Y':  # Yearly frequency
-        end_of_period = pd.Timestamp(max_value).replace(month=12, day=31)
-    else:
-        raise ValueError("Unsupported frequency. Use 'D', 'M', or 'Y'.")
+    elif freq == 'Y':
+        end_of_period = end_of_period.replace(month=12, day=31)
 
-    # Generate date bins with the specified frequency
     date_bins = pd.date_range(start=min_value, end=max_value, freq=freq)
 
-    # Ensure that the end-of-period date is included
+    # Ensure the last date is included
     if len(date_bins) == 0 or date_bins[-1] != end_of_period:
         date_bins = date_bins.append(pd.DatetimeIndex([end_of_period]))
 
+    logger.info(f'📅 Binning by date with {len(date_bins)} bins...')
     return date_bins
 
 
-def get_binned_data(*, query, binning_attribute):
+def get_binned_data(*, query, binning_attribute, freq='Y'):
+    """Generate binned data based on the given attribute and frequency."""
     logger.info(f'📊 Generating binned data based on {binning_attribute}...')
-
-    # Dynamically get the attribute from the Project model based on the provided binning_attribute
     attribute = getattr(Project, binning_attribute)
     min_value, max_value = query.with_entities(func.min(attribute), func.max(attribute)).one()
 
-    logger.info(f'📊 Min value: {min_value}, max value: {max_value}')
+    date_bins = generate_date_bins(min_value=min_value, max_value=max_value, freq=freq)
 
-    # Check if the binning attribute is a date type and create yearly bins using pandas date_range
-    if isinstance(min_value, datetime.date) or isinstance(min_value, datetime.datetime):
-        date_bins = generate_date_bins(min_value=min_value, max_value=max_value, freq='Y')
+    # Create conditions for each bin
+    conditions = [
+        (
+            and_(attribute >= date_bins[i], attribute < date_bins[i + 1]),
+            str(date_bins[i].date()),
+        )
+        for i in range(len(date_bins) - 1)
+    ]
 
-        logger.info(f'📅 Binning by date with {date_bins} bins...')
+    # Define the binned attribute
+    if conditions:
+        binned_attribute = case(conditions, else_='other').label('bin')
+    elif len(date_bins) == 1:
+        binned_attribute = func.concat(date_bins[0].date()).label(
+            'bin'
+        )  # Use concat to return a string literal
+    else:
+        binned_attribute = text('other')  # Explicitly declare the text literal
 
-        conditions = [
-            (
-                and_(
-                    attribute >= date_bins[i],
-                    attribute < date_bins[i + 1],
-                ),
-                str(date_bins[i].year),
+    # Query and format the results
+    query = query.with_entities(
+        binned_attribute, Project.category, func.count(Project.project_id).label('value')
+    )
+    binned_results = query.group_by('bin', Project.category).all()
+
+    formatted_results = []
+    for bin_label, category, value in binned_results:
+        start_date = pd.Timestamp(bin_label) if bin_label != 'other' else None
+        end_date = calculate_end_date(start_date, freq).date() if start_date else None
+        formatted_results.append(
+            ProjectBinnedRegistration(
+                start=start_date, end=end_date, category=category, value=value
             )
-            for i in range(len(date_bins) - 1)
-        ]
-
-        # Check if there are any conditions
-        if conditions:
-            binned_attribute = case(conditions, else_='other').label('bin')
-        elif len(date_bins) == 1:
-            binned_attribute = func.concat(date_bins[0].year).label(
-                'bin'
-            )  # Use concat to return a string literal
-        else:
-            binned_attribute = 'other'
-
-        # Query the database, grouping by the calculated bin and category. Count the number of projects in each group.
-        query = query.with_entities(
-            binned_attribute, Project.category, func.count(Project.project_id).label('value')
         )
-        binned_results = query.group_by('bin', Project.category).all()
-        # Reformat results to be more user-friendly
-        formatted_results = []
-        for bin_label, category, value in binned_results:
-            if bin_label == 'other':
-                start, end = None, None
-            else:
-                start, end = datetime.date(int(bin_label), 1, 1), datetime.date(
-                    int(bin_label) + 1, 1, 1
-                )
-            formatted_results.append(
-                ProjectBinnedRegistration(start=start, end=end, category=category, value=value)
-            )
 
-        logger.info('✅ Binned data generated successfully!')
-        return formatted_results
-    return []
+    logger.info('✅ Binned data generated successfully!')
+    return formatted_results
 
 
 @router.get('/projects_by_registration_date', response_model=list[ProjectBinnedRegistration])
 def get_projects_by_registration_date(
     request: Request,
+    freq: typing.Literal['D', 'W', 'M', 'Y'] = Query('Y', description='Frequency of bins'),
     registry: list[Registries] | None = Query(None, description='Registry name'),
     country: list[str] | None = Query(None, description='Country name'),
     protocol: list[str] | None = Query(None, description='Protocol name'),
@@ -169,10 +167,7 @@ def get_projects_by_registration_date(
             or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern))
         )
 
-    return get_binned_data(
-        binning_attribute='registered_at',
-        query=query,
-    )
+    return get_binned_data(binning_attribute='registered_at', query=query, freq=freq)
 
 
 @router.get('/issuance_totals', response_model=list[ProjectBinnedIssuanceTotals])

From 5833f3845dd0d986a0c036d28d49d655e9664e8d Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Thu, 10 Aug 2023 18:22:31 -0700
Subject: [PATCH 13/17] fix conditions

---
 carbonplan_offsets_db/routers/charts.py | 41 +++++++++++++++----------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 33a2af6..ec318aa 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 from fastapi import APIRouter, Depends, Query, Request
-from sqlmodel import Session, and_, case, func, or_, text
+from sqlmodel import Session, and_, case, func, or_
 
 from ..database import get_session
 from ..logging import get_logger
@@ -53,26 +53,33 @@ def get_binned_data(*, query, binning_attribute, freq='Y'):
     attribute = getattr(Project, binning_attribute)
     min_value, max_value = query.with_entities(func.min(attribute), func.max(attribute)).one()
 
+    if min_value is None or max_value is None:
+        logger.info('✅ No data to bin!')
+        return []
+
     date_bins = generate_date_bins(min_value=min_value, max_value=max_value, freq=freq)
 
-    # Create conditions for each bin
-    conditions = [
-        (
-            and_(attribute >= date_bins[i], attribute < date_bins[i + 1]),
-            str(date_bins[i].date()),
+    conditions = []
+    # Handle the case of exactly one non-null date bin
+    if len(date_bins) == 1:
+        conditions.append((attribute.isnot(None), func.concat(date_bins[0].date())))
+
+    # Handle the case of multiple non-null date bins
+    else:
+        conditions.extend(
+            [
+                (
+                    and_(attribute >= date_bins[i], attribute < date_bins[i + 1]),
+                    str(date_bins[i].date()),
+                )
+                for i in range(len(date_bins) - 1)
+            ]
         )
-        for i in range(len(date_bins) - 1)
-    ]
+    # Add condition for null registration dates
+    conditions.append((attribute.is_(None), 'null'))
 
     # Define the binned attribute
-    if conditions:
-        binned_attribute = case(conditions, else_='other').label('bin')
-    elif len(date_bins) == 1:
-        binned_attribute = func.concat(date_bins[0].date()).label(
-            'bin'
-        )  # Use concat to return a string literal
-    else:
-        binned_attribute = text('other')  # Explicitly declare the text literal
+    binned_attribute = case(conditions, else_='other').label('bin')
 
     # Query and format the results
     query = query.with_entities(
@@ -82,7 +89,7 @@ def get_binned_data(*, query, binning_attribute, freq='Y'):
 
     formatted_results = []
     for bin_label, category, value in binned_results:
-        start_date = pd.Timestamp(bin_label) if bin_label != 'other' else None
+        start_date = pd.Timestamp(bin_label) if bin_label not in ['other', 'null'] else None
         end_date = calculate_end_date(start_date, freq).date() if start_date else None
         formatted_results.append(
             ProjectBinnedRegistration(

From 9ef1a2966495964a17f8f84c298cab3aa0da1755 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Fri, 11 Aug 2023 01:18:39 -0700
Subject: [PATCH 14/17] add credits_by_issuance_table

---
 carbonplan_offsets_db/routers/charts.py | 206 ++++++++++++++++++------
 1 file changed, 161 insertions(+), 45 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index ec318aa..02edfb4 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -1,13 +1,14 @@
 import datetime
 import typing
 
+import numpy as np
 import pandas as pd
 from fastapi import APIRouter, Depends, Query, Request
-from sqlmodel import Session, and_, case, func, or_
+from sqlmodel import Session, and_, case, func
 
 from ..database import get_session
 from ..logging import get_logger
-from ..models import Project, ProjectBinnedIssuanceTotals, ProjectBinnedRegistration
+from ..models import Credit, Project, ProjectBinnedIssuanceTotals, ProjectBinnedRegistration
 from ..query_helpers import apply_filters
 from ..schemas import Registries
 
@@ -22,31 +23,73 @@ def calculate_end_date(start_date, freq):
     elif freq == 'W':
         return start_date + pd.DateOffset(weeks=1)
     elif freq == 'M':
-        return start_date + pd.DateOffset(months=1) + pd.offsets.MonthEnd(0)
+        return start_date + pd.DateOffset(months=1) - pd.DateOffset(days=1)
     else:  # freq == 'Y'
-        return start_date + pd.DateOffset(years=1) + pd.offsets.YearEnd(0)
+        return start_date + pd.DateOffset(years=1) - pd.DateOffset(days=1)
 
 
 def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'W', 'M', 'Y']):
     """Generate date bins with the specified frequency."""
+    start_of_period = pd.Timestamp(min_value)
     end_of_period = pd.Timestamp(max_value)
     if freq == 'M':
         end_of_period = (
             end_of_period.replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1)
         )
     elif freq == 'Y':
+        start_of_period = start_of_period.replace(month=1, day=1)  # Start of the year
         end_of_period = end_of_period.replace(month=12, day=31)
 
-    date_bins = pd.date_range(start=min_value, end=max_value, freq=freq)
+    frequency_mapping = {'Y': 'AS', 'M': 'MS', 'W': 'W', 'D': 'D'}
+
+    logger.info(
+        f'📅 Binning by date with {freq} frequency, start_period: {start_of_period}, end_of_period: {end_of_period}'
+    )
+
+    date_bins = pd.date_range(
+        start=start_of_period, end=end_of_period, freq=frequency_mapping[freq]
+    )
 
     # Ensure the last date is included
     if len(date_bins) == 0 or date_bins[-1] != end_of_period:
         date_bins = date_bins.append(pd.DatetimeIndex([end_of_period]))
 
-    logger.info(f'📅 Binning by date with {len(date_bins)} bins...')
+    logger.info(f'📅 Binning by date with {len(date_bins)} bins...: {date_bins}')
     return date_bins
 
 
+def generate_dynamic_numeric_bins(*, min_value, max_value, bin_width=None):
+    """Generate numeric bins with dynamically adjusted bin width."""
+    # Check for edge cases where min and max are the same
+    if min_value == max_value:
+        return np.array([min_value])
+
+    if bin_width is None:
+        # Calculate the range and order of magnitude
+        value_range = max_value - min_value
+        order_of_magnitude = int(np.floor(np.log10(value_range)))
+
+        # Determine the bin width based on the order of magnitude
+        if order_of_magnitude < 2:
+            bin_width = 10  # Tens for very small ranges
+        elif order_of_magnitude < 3:
+            bin_width = 100  # Hundreds for small ranges
+        elif order_of_magnitude < 4:
+            bin_width = 1000  # Thousands for lower moderate ranges
+        elif order_of_magnitude < 5:
+            bin_width = 10000  # Ten thousands for upper moderate ranges
+        elif order_of_magnitude < 6:
+            bin_width = 100000  # Hundred thousands for large ranges
+        else:
+            bin_width = 1000000  # Millions for very large ranges
+
+    # Generate evenly spaced values using the determined bin width
+    numeric_bins = np.arange(min_value, max_value + bin_width, bin_width)
+
+    logger.info(f'🔢 Binning by numeric value with {len(numeric_bins)} bins, width: {bin_width}...')
+    return numeric_bins
+
+
 def get_binned_data(*, query, binning_attribute, freq='Y'):
     """Generate binned data based on the given attribute and frequency."""
     logger.info(f'📊 Generating binned data based on {binning_attribute}...')
@@ -101,6 +144,112 @@ def get_binned_data(*, query, binning_attribute, freq='Y'):
     return formatted_results
 
 
+def get_binned_numeric_data(*, query, binning_attribute):
+    """Generate binned data based on the given numeric attribute."""
+    logger.info(f'📊 Generating binned data based on {binning_attribute}...')
+    attribute = getattr(Credit, binning_attribute)
+    min_value, max_value = query.with_entities(func.min(attribute), func.max(attribute)).one()
+
+    if min_value is None or max_value is None:
+        logger.info('✅ No data to bin!')
+        return []
+
+    numeric_bins = generate_dynamic_numeric_bins(min_value=min_value, max_value=max_value)
+
+    conditions = []
+    # Handle the case of exactly one non-null bin
+    if len(numeric_bins) == 1:
+        conditions.append((attribute.isnot(None), str(int(numeric_bins[0]))))
+
+    # Handle the case of multiple non-null bins
+    else:
+        conditions.extend(
+            [
+                (
+                    and_(attribute >= int(numeric_bins[i]), attribute < int(numeric_bins[i + 1])),
+                    str(int(numeric_bins[i])),
+                )
+                for i in range(len(numeric_bins) - 1)
+            ]
+        )
+    # Add condition for null attributes
+    conditions.append((attribute.is_(None), 'null'))
+
+    # Define the binned attribute
+    binned_attribute = case(conditions, else_='other').label('bin')
+
+    # Query and format the results
+    query = query.with_entities(
+        binned_attribute, Project.category, func.sum(Credit.quantity).label('value')
+    )
+    binned_results = query.group_by('bin', Project.category).all()
+
+    formatted_results = []
+    for bin_label, category, value in binned_results:
+        start_value = float(bin_label) if bin_label not in ['other', 'null'] else None
+        end_value = start_value + 1 if start_value else None
+        formatted_results.append(
+            ProjectBinnedIssuanceTotals(
+                start=start_value, end=end_value, category=category, value=value
+            )
+        )
+
+    logger.info('✅ Binned data generated successfully!')
+    return formatted_results
+
+
+def credits_by_issuance_date(*, query, freq='Y'):
+    """Generate binned data based on the issuance date."""
+    logger.info('📊 Generating binned data based on issuance date...')
+
+    # Extract the minimum and maximum transaction_date
+    min_date, max_date = query.with_entities(
+        func.min(Credit.transaction_date), func.max(Credit.transaction_date)
+    ).one()
+
+    if min_date is None or max_date is None:
+        logger.info('✅ No data to bin!')
+        return []
+
+    # Generate date bins based on the frequency
+    date_bins = generate_date_bins(min_value=min_date, max_value=max_date, freq=freq)
+
+    # Create conditions for binning
+    conditions = [
+        (
+            and_(
+                Credit.transaction_date >= date_bins[i], Credit.transaction_date < date_bins[i + 1]
+            ),
+            str(date_bins[i].date()),
+        )
+        for i in range(len(date_bins) - 1)
+    ]
+    conditions.append((Credit.transaction_date.is_(None), 'null'))
+
+    # Define the binned attribute
+    binned_attribute = case(conditions, else_='other').label('bin')
+
+    # Query and format the results
+    query = query.with_entities(
+        binned_attribute, Project.category, func.sum(Credit.quantity).label('value')
+    ).group_by('bin', Project.category)
+
+    binned_results = query.all()
+
+    formatted_results = []
+    for bin_label, category, value in binned_results:
+        start_date = pd.Timestamp(bin_label) if bin_label not in ['other', 'null'] else None
+        end_date = calculate_end_date(start_date, freq).date() if start_date else None
+        formatted_results.append(
+            ProjectBinnedRegistration(
+                start=start_date, end=end_date, category=category, value=value
+            )
+        )
+
+    logger.info('✅ Binned data generated successfully!')
+    return formatted_results
+
+
 @router.get('/projects_by_registration_date', response_model=list[ProjectBinnedRegistration])
 def get_projects_by_registration_date(
     request: Request,
@@ -126,11 +275,6 @@ def get_projects_by_registration_date(
     issued_max: int | None = Query(None, description='Maximum number of issued credits'),
     retired_min: int | None = Query(None, description='Minimum number of retired credits'),
     retired_max: int | None = Query(None, description='Maximum number of retired credits'),
-    search: str
-    | None = Query(
-        None,
-        description='Case insensitive search string. Currently searches on `project_id` and `name` fields only.',
-    ),
     session: Session = Depends(get_session),
 ):
     """Get aggregated project registration data"""
@@ -168,19 +312,13 @@ def get_projects_by_registration_date(
             query=query, model=Project, attribute=attribute, values=values, operation=operation
         )
 
-    if search:
-        search_pattern = f'%{search}%'
-        query = query.filter(
-            or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern))
-        )
-
     return get_binned_data(binning_attribute='registered_at', query=query, freq=freq)
 
 
-@router.get('/issuance_totals', response_model=list[ProjectBinnedIssuanceTotals])
-def get_issuance_totals(
+@router.get('/credits_by_issuance_date', response_model=list[dict])
+def get_credits_by_issuance_date(
     request: Request,
-    num_bins: int = Query(15, description='The number of bins'),
+    freq: typing.Literal['D', 'W', 'M', 'Y'] = Query('Y', description='Frequency of bins'),
     registry: list[Registries] | None = Query(None, description='Registry name'),
     country: list[str] | None = Query(None, description='Country name'),
     protocol: list[str] | None = Query(None, description='Protocol name'),
@@ -202,17 +340,13 @@ def get_issuance_totals(
     issued_max: int | None = Query(None, description='Maximum number of issued credits'),
     retired_min: int | None = Query(None, description='Minimum number of retired credits'),
     retired_max: int | None = Query(None, description='Maximum number of retired credits'),
-    search: str
-    | None = Query(
-        None,
-        description='Case insensitive search string. Currently searches on `project_id` and `name` fields only.',
-    ),
     session: Session = Depends(get_session),
 ):
     """Get aggregated project registration data"""
     logger.info(f'Getting project registration data: {request.url}')
 
-    query = session.query(Project)
+    # join Credit with Project on project_id
+    query = session.query(Credit).join(Project, Credit.project_id == Project.project_id)
 
     # Apply filters
     filterable_attributes = [
@@ -244,22 +378,4 @@ def get_issuance_totals(
             query=query, model=Project, attribute=attribute, values=values, operation=operation
         )
 
-    if search:
-        search_pattern = f'%{search}%'
-        query = query.filter(
-            or_(Project.project_id.ilike(search_pattern), Project.name.ilike(search_pattern))
-        )
-
-    # Fetch filtered projects for binning
-    filtered_projects = query.all()
-    # Check if the filtered projects list is empty
-    if not filtered_projects:
-        logger.warning('⚠️ No projects found matching the filtering criteria!')
-        return []
-
-    return get_binned_data(
-        session=session,
-        num_bins=num_bins,
-        binning_attribute='issued',
-        projects=filtered_projects,
-    )
+    return credits_by_issuance_date(query=query, freq=freq)

From 415f1b44e2be0c2f6be3c969827eae75faf1e484 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Fri, 11 Aug 2023 01:36:31 -0700
Subject: [PATCH 15/17] refactor functions

---
 carbonplan_offsets_db/routers/charts.py | 70 +++++++++++++++----------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 02edfb4..a47bf7f 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -18,43 +18,57 @@
 
 def calculate_end_date(start_date, freq):
     """Calculate the end date based on the start date and frequency."""
-    if freq == 'D':
-        return start_date + pd.DateOffset(days=1)
-    elif freq == 'W':
-        return start_date + pd.DateOffset(weeks=1)
-    elif freq == 'M':
-        return start_date + pd.DateOffset(months=1) - pd.DateOffset(days=1)
-    else:  # freq == 'Y'
-        return start_date + pd.DateOffset(years=1) - pd.DateOffset(days=1)
-
-
-def generate_date_bins(*, min_value, max_value, freq: typing.Literal['D', 'W', 'M', 'Y']):
-    """Generate date bins with the specified frequency."""
-    start_of_period = pd.Timestamp(min_value)
-    end_of_period = pd.Timestamp(max_value)
-    if freq == 'M':
-        end_of_period = (
-            end_of_period.replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1)
-        )
-    elif freq == 'Y':
-        start_of_period = start_of_period.replace(month=1, day=1)  # Start of the year
-        end_of_period = end_of_period.replace(month=12, day=31)
 
+    offset_mapping = {
+        'D': pd.DateOffset(days=1),
+        'W': pd.DateOffset(weeks=1),
+        'M': pd.DateOffset(months=1),
+        'Y': pd.DateOffset(years=1),
+    }
+
+    end_date = start_date + offset_mapping[freq]
+    if freq in ['M', 'Y']:
+        end_date -= pd.DateOffset(days=1)
+
+    return end_date
+
+
+def generate_date_bins(min_value, max_value, freq: typing.Literal['D', 'W', 'M', 'Y']):
+    """
+    Generate date bins with the specified frequency.
+
+    Parameters
+    ----------
+    min_value : datetime.date
+        The minimum date value.
+    max_value : datetime.date
+        The maximum date value.
+    freq : str
+        The frequency for binning. Can be 'D', 'W', 'M', or 'Y'.
+
+    Returns
+    -------
+    pd.DatetimeIndex
+        The generated date bins.
+    """
     frequency_mapping = {'Y': 'AS', 'M': 'MS', 'W': 'W', 'D': 'D'}
-
-    logger.info(
-        f'📅 Binning by date with {freq} frequency, start_period: {start_of_period}, end_of_period: {end_of_period}'
-    )
-
     date_bins = pd.date_range(
-        start=start_of_period, end=end_of_period, freq=frequency_mapping[freq]
+        start=pd.Timestamp(min_value), end=pd.Timestamp(max_value), freq=frequency_mapping[freq]
     )
 
     # Ensure the last date is included
+    if freq == 'M':
+        end_of_period = (
+            pd.Timestamp(max_value).replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1)
+        )
+    elif freq == 'Y':
+        end_of_period = pd.Timestamp(max_value).replace(month=12, day=31)
+    else:
+        end_of_period = pd.Timestamp(max_value)
+
     if len(date_bins) == 0 or date_bins[-1] != end_of_period:
         date_bins = date_bins.append(pd.DatetimeIndex([end_of_period]))
 
-    logger.info(f'📅 Binning by date with {len(date_bins)} bins...: {date_bins}')
     return date_bins
 
 

From 11ec5249189d8305e392d9e8117b1a6bae4a0036 Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Fri, 11 Aug 2023 11:07:03 -0700
Subject: [PATCH 16/17] fix date bins

---
 carbonplan_offsets_db/routers/charts.py | 102 +++++++++++-------------
 1 file changed, 48 insertions(+), 54 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index a47bf7f..0e20c8f 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -52,22 +52,17 @@ def generate_date_bins(min_value, max_value, freq: typing.Literal['D', 'W', 'M',
         The generated date bins.
     """
     frequency_mapping = {'Y': 'AS', 'M': 'MS', 'W': 'W', 'D': 'D'}
+    min_value, max_value = pd.Timestamp(min_value), pd.Timestamp(max_value)
     date_bins = pd.date_range(
-        start=pd.Timestamp(min_value), end=pd.Timestamp(max_value), freq=frequency_mapping[freq]
+        start=pd.Timestamp(min_value).replace(month=1, day=1),
+        end=pd.Timestamp(max_value).replace(month=12, day=31),
+        freq=frequency_mapping[freq],
+        normalize=True,
     )
 
     # Ensure the last date is included
-    if freq == 'M':
-        end_of_period = (
-            pd.Timestamp(max_value).replace(day=1) + pd.DateOffset(months=1) - pd.DateOffset(days=1)
-        )
-    elif freq == 'Y':
-        end_of_period = pd.Timestamp(max_value).replace(month=12, day=31)
-    else:
-        end_of_period = pd.Timestamp(max_value)
-
-    if len(date_bins) == 0 or date_bins[-1] != end_of_period:
-        date_bins = date_bins.append(pd.DatetimeIndex([end_of_period]))
+    if len(date_bins) == 0 or date_bins[-1] < max_value:
+        date_bins = date_bins.append(pd.DatetimeIndex([max_value.replace(month=12, day=31)]))
 
     return date_bins
 
@@ -229,15 +224,23 @@ def credits_by_issuance_date(*, query, freq='Y'):
     date_bins = generate_date_bins(min_value=min_date, max_value=max_date, freq=freq)
 
     # Create conditions for binning
-    conditions = [
-        (
-            and_(
-                Credit.transaction_date >= date_bins[i], Credit.transaction_date < date_bins[i + 1]
-            ),
-            str(date_bins[i].date()),
+    conditions = []
+    # Handle the case of exactly one non-null date bin
+    if len(date_bins) == 1:
+        conditions.append((Credit.transaction_date.isnot(None), str(date_bins[0].date())))
+    else:
+        conditions.extend(
+            [
+                (
+                    and_(
+                        Credit.transaction_date >= date_bins[i],
+                        Credit.transaction_date < date_bins[i + 1],
+                    ),
+                    str(date_bins[i].date()),
+                )
+                for i in range(len(date_bins) - 1)
+            ]
         )
-        for i in range(len(date_bins) - 1)
-    ]
     conditions.append((Credit.transaction_date.is_(None), 'null'))
 
     # Define the binned attribute
@@ -334,26 +337,16 @@ def get_credits_by_issuance_date(
     request: Request,
     freq: typing.Literal['D', 'W', 'M', 'Y'] = Query('Y', description='Frequency of bins'),
     registry: list[Registries] | None = Query(None, description='Registry name'),
-    country: list[str] | None = Query(None, description='Country name'),
-    protocol: list[str] | None = Query(None, description='Protocol name'),
     category: list[str] | None = Query(None, description='Category name'),
     is_arb: bool | None = Query(None, description='Whether project is an ARB project'),
-    registered_at_from: datetime.date
+    transaction_type: list[str] | None = Query(None, description='Transaction type'),
+    vintage: list[int] | None = Query(None, description='Vintage'),
+    transaction_date_from: datetime.date
     | datetime.datetime
     | None = Query(default=None, description='Format: YYYY-MM-DD'),
-    registered_at_to: datetime.date
+    transaction_date_to: datetime.date
     | datetime.datetime
     | None = Query(default=None, description='Format: YYYY-MM-DD'),
-    started_at_from: datetime.date
-    | datetime.datetime
-    | None = Query(default=None, description='Format: YYYY-MM-DD'),
-    started_at_to: datetime.date
-    | datetime.datetime
-    | None = Query(default=None, description='Format: YYYY-MM-DD'),
-    issued_min: int | None = Query(None, description='Minimum number of issued credits'),
-    issued_max: int | None = Query(None, description='Maximum number of issued credits'),
-    retired_min: int | None = Query(None, description='Minimum number of retired credits'),
-    retired_max: int | None = Query(None, description='Maximum number of retired credits'),
     session: Session = Depends(get_session),
 ):
     """Get aggregated project registration data"""
@@ -362,34 +355,35 @@ def get_credits_by_issuance_date(
     # join Credit with Project on project_id
     query = session.query(Credit).join(Project, Credit.project_id == Project.project_id)
 
-    # Apply filters
-    filterable_attributes = [
-        ('registry', registry, 'ilike'),
-        ('country', country, 'ilike'),
-        ('protocol', protocol, 'ilike'),
-        ('category', category, 'ilike'),
+    # Filters applying 'ilike' operation
+    ilike_filters = [
+        ('registry', registry, 'ilike', Project),
+        ('category', category, 'ilike', Project),
+        ('transaction_type', transaction_type, 'ilike', Credit),
     ]
 
-    for attribute, values, operation in filterable_attributes:
+    for attribute, values, operation, model in ilike_filters:
         query = apply_filters(
-            query=query, model=Project, attribute=attribute, values=values, operation=operation
+            query=query, model=model, attribute=attribute, values=values, operation=operation
         )
 
-    other_filters = [
-        ('is_arb', is_arb, '=='),
-        ('registered_at', registered_at_from, '>='),
-        ('registered_at', registered_at_to, '<='),
-        ('started_at', started_at_from, '>='),
-        ('started_at', started_at_to, '<='),
-        ('issued', issued_min, '>='),
-        ('issued', issued_max, '<='),
-        ('retired', retired_min, '>='),
-        ('retired', retired_max, '<='),
+    # Filter applying '==' operation
+    equal_filters = [('is_arb', is_arb, '==', Project), ('vintage', vintage, '==', Credit)]
+
+    for attribute, values, operation, model in equal_filters:
+        query = apply_filters(
+            query=query, model=model, attribute=attribute, values=values, operation=operation
+        )
+
+    # Filters applying '>=' or '<=' operations
+    date_filters = [
+        ('transaction_date', transaction_date_from, '>=', Credit),
+        ('transaction_date', transaction_date_to, '<=', Credit),
     ]
 
-    for attribute, values, operation in other_filters:
+    for attribute, values, operation, model in date_filters:
         query = apply_filters(
-            query=query, model=Project, attribute=attribute, values=values, operation=operation
+            query=query, model=model, attribute=attribute, values=values, operation=operation
         )
 
     return credits_by_issuance_date(query=query, freq=freq)

From 006414b7e371156b5c812e1a7c3c3275b4e1a52d Mon Sep 17 00:00:00 2001
From: Anderson Banihirwe <axbanihirwe@ualr.edu>
Date: Fri, 11 Aug 2023 11:12:19 -0700
Subject: [PATCH 17/17] refactor credits_by_transaction_date

---
 carbonplan_offsets_db/routers/charts.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/carbonplan_offsets_db/routers/charts.py b/carbonplan_offsets_db/routers/charts.py
index 0e20c8f..367919d 100644
--- a/carbonplan_offsets_db/routers/charts.py
+++ b/carbonplan_offsets_db/routers/charts.py
@@ -207,9 +207,9 @@ def get_binned_numeric_data(*, query, binning_attribute):
     return formatted_results
 
 
-def credits_by_issuance_date(*, query, freq='Y'):
-    """Generate binned data based on the issuance date."""
-    logger.info('📊 Generating binned data based on issuance date...')
+def credits_by_transaction_date(*, query, freq='Y'):
+    """Generate binned data based on the transaction date."""
+    logger.info('📊 Generating binned data based on transaction date...')
 
     # Extract the minimum and maximum transaction_date
     min_date, max_date = query.with_entities(
@@ -332,8 +332,8 @@ def get_projects_by_registration_date(
     return get_binned_data(binning_attribute='registered_at', query=query, freq=freq)
 
 
-@router.get('/credits_by_issuance_date', response_model=list[dict])
-def get_credits_by_issuance_date(
+@router.get('/credits_by_transaction_date', response_model=list[dict])
+def get_credits_by_transaction_date(
     request: Request,
     freq: typing.Literal['D', 'W', 'M', 'Y'] = Query('Y', description='Frequency of bins'),
     registry: list[Registries] | None = Query(None, description='Registry name'),
@@ -349,8 +349,8 @@ def get_credits_by_issuance_date(
     | None = Query(default=None, description='Format: YYYY-MM-DD'),
     session: Session = Depends(get_session),
 ):
-    """Get aggregated project registration data"""
-    logger.info(f'Getting project registration data: {request.url}')
+    """Get aggregated credit transaction data"""
+    logger.info(f'Getting credit transaction data: {request.url}')
 
     # join Credit with Project on project_id
     query = session.query(Credit).join(Project, Credit.project_id == Project.project_id)
@@ -386,4 +386,4 @@ def get_credits_by_issuance_date(
             query=query, model=model, attribute=attribute, values=values, operation=operation
         )
 
-    return credits_by_issuance_date(query=query, freq=freq)
+    return credits_by_transaction_date(query=query, freq=freq)