mirror of
https://github.com/microsoft/graphrag.git
synced 2025-06-26 23:19:58 +00:00

* Move covariate run conditional * All pipeline registration * Fix method name construction * Rename context storage -> output_storage * Rename OutputConfig as generic StorageConfig * Reuse Storage model under InputConfig * Move input storage creation out of document loading * Move document loading into workflows * Semver * Fix smoke test config for new workflows * Fix unit tests --------- Co-authored-by: Alonso Guevara <alonsog@microsoft.com>
130 lines
3.4 KiB
JSON
130 lines
3.4 KiB
JSON
{
|
|
"input_path": "./tests/fixtures/text",
|
|
"input_file_type": "text",
|
|
"workflow_config": {
|
|
"load_input_documents": {
|
|
"max_runtime": 30
|
|
},
|
|
"create_base_text_units": {
|
|
"max_runtime": 30
|
|
},
|
|
"extract_graph": {
|
|
"max_runtime": 500
|
|
},
|
|
"finalize_graph": {
|
|
"row_range": [
|
|
1,
|
|
500
|
|
],
|
|
"nan_allowed_columns": [
|
|
"x",
|
|
"y"
|
|
],
|
|
"max_runtime": 30,
|
|
"expected_artifacts": [
|
|
"entities.parquet",
|
|
"relationships.parquet"
|
|
]
|
|
},
|
|
"extract_covariates": {
|
|
"row_range": [
|
|
1,
|
|
100
|
|
],
|
|
"nan_allowed_columns": [
|
|
"type",
|
|
"description",
|
|
"object_id",
|
|
"status",
|
|
"start_date",
|
|
"end_date",
|
|
"source_text"
|
|
],
|
|
"max_runtime": 300,
|
|
"expected_artifacts": ["covariates.parquet"]
|
|
},
|
|
"create_communities": {
|
|
"row_range": [
|
|
1,
|
|
30
|
|
],
|
|
"max_runtime": 30,
|
|
"expected_artifacts": ["communities.parquet"]
|
|
},
|
|
"create_community_reports": {
|
|
"row_range": [
|
|
1,
|
|
30
|
|
],
|
|
"nan_allowed_columns": [
|
|
"title",
|
|
"summary",
|
|
"full_content",
|
|
"full_content_json",
|
|
"rank",
|
|
"rank_explanation",
|
|
"findings",
|
|
"period",
|
|
"size"
|
|
],
|
|
"max_runtime": 300,
|
|
"expected_artifacts": ["community_reports.parquet"]
|
|
},
|
|
"create_final_text_units": {
|
|
"row_range": [
|
|
1,
|
|
10
|
|
],
|
|
"nan_allowed_columns": [
|
|
"relationship_ids",
|
|
"entity_ids",
|
|
"covariate_ids"
|
|
],
|
|
"max_runtime": 30,
|
|
"expected_artifacts": ["text_units.parquet"]
|
|
},
|
|
"create_final_documents": {
|
|
"row_range": [
|
|
1,
|
|
1
|
|
],
|
|
"nan_allowed_columns": [
|
|
"metadata"
|
|
],
|
|
"max_runtime": 30,
|
|
"expected_artifacts": ["documents.parquet"]
|
|
},
|
|
"generate_text_embeddings": {
|
|
"row_range": [
|
|
1,
|
|
100
|
|
],
|
|
"max_runtime": 150,
|
|
"expected_artifacts": [
|
|
"embeddings.text_unit.text.parquet",
|
|
"embeddings.entity.description.parquet",
|
|
"embeddings.community.full_content.parquet"
|
|
]
|
|
}
|
|
},
|
|
"query_config": [
|
|
{
|
|
"query": "Who is Agent Alex Mercer and what are his goals?",
|
|
"method": "local"
|
|
},
|
|
{
|
|
"query": "What is the major conflict in this story and who are the protagonist and antagonist?",
|
|
"method": "global"
|
|
},
|
|
{
|
|
"query": "What is the main theme of the story?",
|
|
"method": "drift"
|
|
},
|
|
{
|
|
"query": "Who is Jordan Hayes?",
|
|
"method": "basic"
|
|
}
|
|
],
|
|
"slow": false
|
|
}
|