graphrag-accelerator/backend/src/api/pipeline-settings.yaml

88 lines
2.0 KiB
YAML
Raw Normal View History

2024-06-26 15:45:06 -04:00
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# this yaml file serves as a configuration template for the graphrag indexing jobs
# some values are hardcoded while others will be dynamically set
input:
type: blob
file_type: text
file_pattern: .*\.txt$
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
container_name: PLACEHOLDER
base_dir: .
storage:
type: blob
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
container_name: PLACEHOLDER
base_dir: output
reporting:
type: blob
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
container_name: PLACEHOLDER
base_dir: logs
cache:
type: blob
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
container_name: PLACEHOLDER
base_dir: cache
llm:
type: azure_openai_chat
api_base: $GRAPHRAG_API_BASE
api_version: $GRAPHRAG_API_VERSION
model: $GRAPHRAG_LLM_MODEL
deployment_name: $GRAPHRAG_LLM_DEPLOYMENT_NAME
cognitive_services_endpoint: $GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT
model_supports_json: True
tokens_per_minute: 80000
requests_per_minute: 480
thread_count: 50
concurrent_requests: 25
parallelization:
stagger: 0.25
num_threads: 10
async_mode: threaded
embeddings:
async_mode: threaded
llm:
type: azure_openai_embedding
api_base: $GRAPHRAG_API_BASE
api_version: $GRAPHRAG_API_VERSION
batch_size: 16
model: $GRAPHRAG_EMBEDDING_MODEL
deployment_name: $GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME
cognitive_services_endpoint: $GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT
tokens_per_minute: 350000
concurrent_requests: 25
requests_per_minute: 2100
thread_count: 50
max_retries: 50
parallelization:
stagger: 0.25
num_threads: 10
vector_store:
type: azure_ai_search
collection_name: PLACEHOLDER
title_column: name
overwrite: True
url: $AI_SEARCH_URL
audience: $AI_SEARCH_AUDIENCE
# entity_extraction:
# prompt: PLACEHOLDER
# community_reports:
# prompt: PLACEHOLDER
# summarize_descriptions:
# prompt: PLACEHOLDER
snapshots:
graphml: True