mirror of
https://github.com/Azure-Samples/graphrag-accelerator.git
synced 2025-07-31 21:28:03 +00:00
88 lines
2.0 KiB
YAML
88 lines
2.0 KiB
YAML
![]() |
# Copyright (c) Microsoft Corporation.
|
||
|
# Licensed under the MIT License.
|
||
|
|
||
|
# this yaml file serves as a configuration template for the graphrag indexing jobs
|
||
|
# some values are hardcoded while others will be dynamically set
|
||
|
input:
|
||
|
type: blob
|
||
|
file_type: text
|
||
|
file_pattern: .*\.txt$
|
||
|
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
|
||
|
container_name: PLACEHOLDER
|
||
|
base_dir: .
|
||
|
|
||
|
storage:
|
||
|
type: blob
|
||
|
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
|
||
|
container_name: PLACEHOLDER
|
||
|
base_dir: output
|
||
|
|
||
|
reporting:
|
||
|
type: blob
|
||
|
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
|
||
|
container_name: PLACEHOLDER
|
||
|
base_dir: logs
|
||
|
|
||
|
cache:
|
||
|
type: blob
|
||
|
storage_account_blob_url: $STORAGE_ACCOUNT_BLOB_URL
|
||
|
container_name: PLACEHOLDER
|
||
|
base_dir: cache
|
||
|
|
||
|
llm:
|
||
|
type: azure_openai_chat
|
||
|
api_base: $GRAPHRAG_API_BASE
|
||
|
api_version: $GRAPHRAG_API_VERSION
|
||
|
model: $GRAPHRAG_LLM_MODEL
|
||
|
deployment_name: $GRAPHRAG_LLM_DEPLOYMENT_NAME
|
||
|
cognitive_services_endpoint: $GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT
|
||
|
model_supports_json: True
|
||
|
tokens_per_minute: 80000
|
||
|
requests_per_minute: 480
|
||
|
thread_count: 50
|
||
|
concurrent_requests: 25
|
||
|
|
||
|
parallelization:
|
||
|
stagger: 0.25
|
||
|
num_threads: 10
|
||
|
|
||
|
async_mode: threaded
|
||
|
|
||
|
embeddings:
|
||
|
async_mode: threaded
|
||
|
llm:
|
||
|
type: azure_openai_embedding
|
||
|
api_base: $GRAPHRAG_API_BASE
|
||
|
api_version: $GRAPHRAG_API_VERSION
|
||
|
batch_size: 16
|
||
|
model: $GRAPHRAG_EMBEDDING_MODEL
|
||
|
deployment_name: $GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME
|
||
|
cognitive_services_endpoint: $GRAPHRAG_COGNITIVE_SERVICES_ENDPOINT
|
||
|
tokens_per_minute: 350000
|
||
|
concurrent_requests: 25
|
||
|
requests_per_minute: 2100
|
||
|
thread_count: 50
|
||
|
max_retries: 50
|
||
|
parallelization:
|
||
|
stagger: 0.25
|
||
|
num_threads: 10
|
||
|
vector_store:
|
||
|
type: azure_ai_search
|
||
|
collection_name: PLACEHOLDER
|
||
|
title_column: name
|
||
|
overwrite: True
|
||
|
url: $AI_SEARCH_URL
|
||
|
audience: $AI_SEARCH_AUDIENCE
|
||
|
|
||
|
# entity_extraction:
|
||
|
# prompt: PLACEHOLDER
|
||
|
|
||
|
# community_reports:
|
||
|
# prompt: PLACEHOLDER
|
||
|
|
||
|
# summarize_descriptions:
|
||
|
# prompt: PLACEHOLDER
|
||
|
|
||
|
snapshots:
|
||
|
graphml: True
|