Pagination Limit?

eduardomar · September 16, 2022, 8:22pm

Hello,

I am trying to pull the economic table of a very large project (~2500 wells) and when I check the download I noticed that it stops at around 500k records pulled. Is there a pagination or time limit on your side that could cause this? I have connection error exceptions built into the logic and those seem to be working as intended. However, I am getting a “satisfactory” message somewhere around 500k records but it should be somewhere more on the 1.5 million records (2500 wells x 600 months of data each)

Derek_Lokey · September 16, 2022, 8:48pm

Hi Eduardo. Could you provide the details of your query and the exact final response message? We can start looking into it.

eduardomar · September 19, 2022, 1:50pm

Hi Derek,

Here is the code I used:

project_id = "6320bf91cb52aa00135e2387"
scenario_id = "6321ee22fa6ecf00126a1cb5"
econ_run_id = "63234ac0b6d7a400134b6b61"


project_name = "TROG PDP"
scenario_name = "DIVERSIFIED"
latest_run_date = "2022-09-15 10:54:40.657000-05:00"


monthly_export_url = create_monthly_export_url(project_id, scenario_id, econ_run_id)


monthly_export_table = get_monthly_records_large(
    monthly_export_url,
    project_name,
    project_id,
    scenario_name,
    scenario_id,
    latest_run_date,
)


def create_monthly_export_url(project_id, scenario_id, econ_id, date_max=""):
    if date_max == "":

        # POST monthly-exports
        # combocurve_auth = ComboCurveAuth(service_account, api_key)
        auth_headers = combocurve_auth.get_auth_headers()
        url = f"projects/{project_id}/scenarios/{scenario_id}/econ-runs/{econ_id}/monthly-exports"
        url = "https://api.combocurve.com/v1/" + url

        response = requests.request("POST", url, headers=auth_headers)
        econ_run_id = pd.DataFrame(response.json(), index=[0])
        econ_run_id = econ_run_id.id[0]

    else:
        # POST monthly-exports
        # combocurve_auth = ComboCurveAuth(service_account, api_key)
        auth_headers = combocurve_auth.get_auth_headers()
        url = f"projects/{project_id}/scenarios/{scenario_id}/econ-runs/{econ_id}/monthly-exports"
        url = url + "?date[le]=" + date_max
        url = "https://api.combocurve.com/v1/" + url

        response = requests.request("POST", url, headers=auth_headers)
        econ_run_id = pd.DataFrame(response.json(), index=[0])
        econ_run_id = econ_run_id.id[0]

    url = url + f"/{econ_run_id}?take=200"

    return url

def get_monthly_records_large(
        url,
        project_name,
        project_id,
        scenario_name,
        scenario_id,
        latest_run_date,
        auth_headers=combocurve_auth.get_auth_headers(),
):
    username = "XXXXX"
    pwd = "XXXXX"
    host = "XXXX"
    db = "XXXX"
    conn_string = f"postgresql://{username}:{pwd}@{host}:5432/{db}"

    engine = create_engine(conn_string)

    tmp_list = []

    COUNTER_SPLIT = 50000

    counter = 0
    print("Pulling monthly data")

    # try:
    truncate_econ_table_records("monthly_export", project_id, scenario_id, engine)
    mlog("Initial truncation of database")

    for record in get_all2(url, auth_headers):

        tmp_list.append(record)

        write_to_db_counter = counter % COUNTER_SPLIT

        if write_to_db_counter == 0 and counter > 0:
            temp_df = create_monthly_export_table(tmp_list)
            mlog("Created monthly export table")

            temp_df = append_project_info(
                temp_df,
                project_name,
                project_id,
                scenario_name,
                scenario_id,
                latest_run_date,
                True,
            )
            mlog("Append information")

            temp_df = fill_missing_cols("project_monthly_export", temp_df)

            mlog("Attempt to write to db")
            write_large_monthly_table_to_db(temp_df)
            mlog("Success writing to db")

            # Teams message
            myTeamsMessage.text(
                f"Writing partial data to database for : {project_name} - scenario: {scenario_name} - run date: {latest_run_date}"
            )
            myTeamsMessage.send()

            tmp_list = []

        if counter % COUNTER_SPLIT == 0:
            print(f"Wrote {counter} monthly records to db")

        counter += 1

    # Write leftover ones

    temp_df = create_monthly_export_table(tmp_list)
    temp_df = append_project_info(
        temp_df,
        project_name,
        project_id,
        scenario_name,
        scenario_id,
        latest_run_date,
        True,
    )
    temp_df = fill_missing_cols("project_monthly_export", temp_df)

    mlog("Attempt to write to db")
    write_large_monthly_table_to_db(temp_df)
    mlog("Success writing to db")

    # Teams message
    myTeamsMessage.text(
        f"Finish writing data to database for : {project_name} - scenario: {scenario_name} - run date: {latest_run_date}"
    )
    myTeamsMessage.send()


def get_all2(url, headers, get_post=True):
    # First request
    has_more = True

    # Keep fetching while there are more records to be returned
    while has_more:
        retries = 1
        success = False
        while not success:
            try:
                response = requests.get(url, headers=headers)
                data = response.json()

                # Return the records to be processed by the caller instead of doing the processing here

                if "results" in data.keys():
                    yield from data["results"]

                url = get_next_page_url(response.headers)
                has_more = url is not None

                success = True
            except Exception as e:
                mlog("Exception getting record from CC!")

                wait = 85
                print(f"Error! Waiting {wait} secs. Retry number {retries}/3")

                time.sleep(wait)
                retries += 1
                if retries == 4:
                    mlog(f"Used all available retries!")
                    raise Exception
                mlog(f"Continue to attempt {retries}/3!")
                continue

eduardomar · September 19, 2022, 1:52pm

I am not getting a final exception to log meaning the program is finishing as intended, but I am missing records. If you need me to log something specific I can add it and re-run

Derek_Lokey · September 19, 2022, 2:02pm

OK, this should help us get started. Thanks Eduardo.

Topic		Replies	Views
TimeoutError	6	190	July 14, 2022
Faster Monthly Records Download Guidance python	13	360	February 10, 2025
Get daily production not getting all records Guidance python	3	233	October 26, 2023
MonthlyExportResult GET endpoint is not returning a full month by month record Guidance economics	5	82	May 22, 2024
?take=all Paramater Guidance python	16	520	December 31, 2021

Pagination Limit?

Related topics