graphrag/tests/verbs/test_create_final_documents.py

# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

from graphrag.index.workflows.v1.create_final_documents import (
    build_steps,
    workflow_name,
)

from .util import (
    compare_outputs,
    get_config_for_workflow,
    get_workflow_output,
    load_expected,
    load_input_tables,
)


async def test_create_final_documents():
    input_tables = load_input_tables([
        "workflow:create_final_text_units",
    ])
    expected = load_expected(workflow_name)

    config = get_config_for_workflow(workflow_name)

    steps = build_steps(config)

    actual = await get_workflow_output(
        input_tables,
        {
            "steps": steps,
        },
    )

    compare_outputs(actual, expected)


async def test_create_final_documents_with_attribute_columns():
    input_tables = load_input_tables(["workflow:create_final_text_units"])
    expected = load_expected(workflow_name)

    config = get_config_for_workflow(workflow_name)

    config["document_attribute_columns"] = ["title"]

    steps = build_steps(config)

    actual = await get_workflow_output(
        input_tables,
        {
            "steps": steps,
        },
    )

    # we should have dropped "title" and added "attributes"
    # our test dataframe does not have attributes, so we'll assert without it
    # and separately confirm it is in the output
    compare_outputs(actual, expected, columns=["id", "text_unit_ids", "raw_content"])
    assert len(actual.columns) == 4
    assert "attributes" in actual.columns
Collapse create final documents (#1217) * Collapse create_final_documents * Semver 2024-09-25 15:50:46 -07:00			`# Copyright (c) 2024 Microsoft Corporation.`
			`# Licensed under the MIT License`

			`from graphrag.index.workflows.v1.create_final_documents import (`
			`build_steps,`
			`workflow_name,`
			`)`

			`from .util import (`
			`compare_outputs,`
			`get_config_for_workflow,`
			`get_workflow_output,`
			`load_expected,`
			`load_input_tables,`
			`)`


			`async def test_create_final_documents():`
			`input_tables = load_input_tables([`
Collapse graph documents workflows (#1284) * Copy base documents logic into final documents * Delete create_base_documents * Combine graph creation under create_base_entity_graph * Delete collapsed workflows * Migrate most graph internals to nx.Graph * Fix None edge case * Semver * Remove comment typo * Fix smoke tests 2024-10-15 12:58:58 -07:00			`"workflow:create_final_text_units",`
Collapse create final documents (#1217) * Collapse create_final_documents * Semver 2024-09-25 15:50:46 -07:00			`])`
			`expected = load_expected(workflow_name)`

			`config = get_config_for_workflow(workflow_name)`

Reorganize flows (#1240) * Extract base docs and entity graph * Move extracted entities and text units * Move communities and community reports * Move covariates and final documents * Move entities, nodes, relationships * Move text_units and summarized entities * Assert all snapshot null cases * Remove disabled steps util * Remove incorrect use of input "others" * Convert text_embed_df to just return the embeddings, not update the df * Convert snapshot functions to noops * Semver * Remove lingering covariates_enabled param * Name consistency * Syntax cleanup 2024-10-02 08:57:08 -07:00			`steps = build_steps(config)`
Collapse create final documents (#1217) * Collapse create_final_documents * Semver 2024-09-25 15:50:46 -07:00
			`actual = await get_workflow_output(`
			`input_tables,`
			`{`
			`"steps": steps,`
			`},`
			`)`

			`compare_outputs(actual, expected)`


Collapse graph documents workflows (#1284) * Copy base documents logic into final documents * Delete create_base_documents * Combine graph creation under create_base_entity_graph * Delete collapsed workflows * Migrate most graph internals to nx.Graph * Fix None edge case * Semver * Remove comment typo * Fix smoke tests 2024-10-15 12:58:58 -07:00			`async def test_create_final_documents_with_attribute_columns():`
			`input_tables = load_input_tables(["workflow:create_final_text_units"])`
			`expected = load_expected(workflow_name)`

			`config = get_config_for_workflow(workflow_name)`

			`config["document_attribute_columns"] = ["title"]`

			`steps = build_steps(config)`

			`actual = await get_workflow_output(`
			`input_tables,`
			`{`
			`"steps": steps,`
			`},`
			`)`

			`# we should have dropped "title" and added "attributes"`
			`# our test dataframe does not have attributes, so we'll assert without it`
			`# and separately confirm it is in the output`
			`compare_outputs(actual, expected, columns=["id", "text_unit_ids", "raw_content"])`
			`assert len(actual.columns) == 4`
			`assert "attributes" in actual.columns`