Pagination Limit?

Hello,

I am trying to pull the economic table of a very large project (~2500 wells) and when I check the download I noticed that it stops at around 500k records pulled. Is there a pagination or time limit on your side that could cause this? I have connection error exceptions built into the logic and those seem to be working as intended. However, I am getting a “satisfactory” message somewhere around 500k records but it should be somewhere more on the 1.5 million records (2500 wells x 600 months of data each)

Hi Eduardo. Could you provide the details of your query and the exact final response message? We can start looking into it.

Hi Derek,

Here is the code I used:

project_id = "6320bf91cb52aa00135e2387"
scenario_id = "6321ee22fa6ecf00126a1cb5"
econ_run_id = "63234ac0b6d7a400134b6b61"


project_name = "TROG PDP"
scenario_name = "DIVERSIFIED"
latest_run_date = "2022-09-15 10:54:40.657000-05:00"


monthly_export_url = create_monthly_export_url(project_id, scenario_id, econ_run_id)


monthly_export_table = get_monthly_records_large(
    monthly_export_url,
    project_name,
    project_id,
    scenario_name,
    scenario_id,
    latest_run_date,
)


def create_monthly_export_url(project_id, scenario_id, econ_id, date_max=""):
    if date_max == "":

        # POST monthly-exports
        # combocurve_auth = ComboCurveAuth(service_account, api_key)
        auth_headers = combocurve_auth.get_auth_headers()
        url = f"projects/{project_id}/scenarios/{scenario_id}/econ-runs/{econ_id}/monthly-exports"
        url = "https://api.combocurve.com/v1/" + url

        response = requests.request("POST", url, headers=auth_headers)
        econ_run_id = pd.DataFrame(response.json(), index=[0])
        econ_run_id = econ_run_id.id[0]

    else:
        # POST monthly-exports
        # combocurve_auth = ComboCurveAuth(service_account, api_key)
        auth_headers = combocurve_auth.get_auth_headers()
        url = f"projects/{project_id}/scenarios/{scenario_id}/econ-runs/{econ_id}/monthly-exports"
        url = url + "?date[le]=" + date_max
        url = "https://api.combocurve.com/v1/" + url

        response = requests.request("POST", url, headers=auth_headers)
        econ_run_id = pd.DataFrame(response.json(), index=[0])
        econ_run_id = econ_run_id.id[0]

    url = url + f"/{econ_run_id}?take=200"

    return url

def get_monthly_records_large(
        url,
        project_name,
        project_id,
        scenario_name,
        scenario_id,
        latest_run_date,
        auth_headers=combocurve_auth.get_auth_headers(),
):
    username = "XXXXX"
    pwd = "XXXXX"
    host = "XXXX"
    db = "XXXX"
    conn_string = f"postgresql://{username}:{pwd}@{host}:5432/{db}"

    engine = create_engine(conn_string)

    tmp_list = []

    COUNTER_SPLIT = 50000

    counter = 0
    print("Pulling monthly data")

    # try:
    truncate_econ_table_records("monthly_export", project_id, scenario_id, engine)
    mlog("Initial truncation of database")

    for record in get_all2(url, auth_headers):

        tmp_list.append(record)

        write_to_db_counter = counter % COUNTER_SPLIT

        if write_to_db_counter == 0 and counter > 0:
            temp_df = create_monthly_export_table(tmp_list)
            mlog("Created monthly export table")

            temp_df = append_project_info(
                temp_df,
                project_name,
                project_id,
                scenario_name,
                scenario_id,
                latest_run_date,
                True,
            )
            mlog("Append information")

            temp_df = fill_missing_cols("project_monthly_export", temp_df)

            mlog("Attempt to write to db")
            write_large_monthly_table_to_db(temp_df)
            mlog("Success writing to db")

            # Teams message
            myTeamsMessage.text(
                f"Writing partial data to database for : {project_name} - scenario: {scenario_name} - run date: {latest_run_date}"
            )
            myTeamsMessage.send()

            tmp_list = []

        if counter % COUNTER_SPLIT == 0:
            print(f"Wrote {counter} monthly records to db")

        counter += 1

    # Write leftover ones

    temp_df = create_monthly_export_table(tmp_list)
    temp_df = append_project_info(
        temp_df,
        project_name,
        project_id,
        scenario_name,
        scenario_id,
        latest_run_date,
        True,
    )
    temp_df = fill_missing_cols("project_monthly_export", temp_df)

    mlog("Attempt to write to db")
    write_large_monthly_table_to_db(temp_df)
    mlog("Success writing to db")

    # Teams message
    myTeamsMessage.text(
        f"Finish writing data to database for : {project_name} - scenario: {scenario_name} - run date: {latest_run_date}"
    )
    myTeamsMessage.send()


def get_all2(url, headers, get_post=True):
    # First request
    has_more = True

    # Keep fetching while there are more records to be returned
    while has_more:
        retries = 1
        success = False
        while not success:
            try:
                response = requests.get(url, headers=headers)
                data = response.json()

                # Return the records to be processed by the caller instead of doing the processing here

                if "results" in data.keys():
                    yield from data["results"]

                url = get_next_page_url(response.headers)
                has_more = url is not None

                success = True
            except Exception as e:
                mlog("Exception getting record from CC!")

                wait = 85
                print(f"Error! Waiting {wait} secs. Retry number {retries}/3")

                time.sleep(wait)
                retries += 1
                if retries == 4:
                    mlog(f"Used all available retries!")
                    raise Exception
                mlog(f"Continue to attempt {retries}/3!")
                continue


I am not getting a final exception to log meaning the program is finishing as intended, but I am missing records. If you need me to log something specific I can add it and re-run

OK, this should help us get started. Thanks Eduardo.