mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-06-26 22:19:57 +00:00
fix docker compose issue (#238)
### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ Issue link:#[[Link the issue here](https://github.com/infiniflow/ragflow/issues/226)] ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
b4abbe5d93
commit
23b448cf96
@ -65,6 +65,11 @@ def upload():
|
||||
DocumentService.query,
|
||||
name=file.filename,
|
||||
kb_id=kb.id)
|
||||
filetype = filename_type(filename)
|
||||
if not filetype:
|
||||
return get_data_error_result(
|
||||
retmsg="This type of file has not been supported yet!")
|
||||
|
||||
location = filename
|
||||
while MINIO.obj_exist(kb_id, location):
|
||||
location += "_"
|
||||
|
@ -25,7 +25,7 @@ from api.utils.api_utils import server_error_response, validate_request
|
||||
from api.utils import get_uuid, get_format_time, decrypt, download_img
|
||||
from api.db import UserTenantRole, LLMType
|
||||
from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, API_KEY, \
|
||||
LLM_FACTORY
|
||||
LLM_FACTORY, LLM_BASE_URL
|
||||
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
||||
from api.settings import stat_logger
|
||||
from api.utils.api_utils import get_json_result, cors_reponse
|
||||
@ -220,7 +220,9 @@ def user_register(user_id, user):
|
||||
"llm_factory": LLM_FACTORY,
|
||||
"llm_name": llm.llm_name,
|
||||
"model_type": llm.model_type,
|
||||
"api_key": API_KEY})
|
||||
"api_key": API_KEY,
|
||||
"base_url": LLM_BASE_URL
|
||||
})
|
||||
|
||||
if not UserService.save(**user):
|
||||
return
|
||||
|
@ -13,6 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
|
||||
@ -21,7 +22,7 @@ from api.db.db_models import init_database_tables as init_web_db
|
||||
from api.db.services import UserService
|
||||
from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle
|
||||
from api.db.services.user_service import TenantService, UserTenantService
|
||||
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY
|
||||
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
|
||||
|
||||
|
||||
def init_superuser():
|
||||
@ -53,7 +54,7 @@ def init_superuser():
|
||||
for llm in LLMService.query(fid=LLM_FACTORY):
|
||||
tenant_llm.append(
|
||||
{"tenant_id": user_info["id"], "llm_factory": LLM_FACTORY, "llm_name": llm.llm_name, "model_type": llm.model_type,
|
||||
"api_key": API_KEY})
|
||||
"api_key": API_KEY, "base_url": LLM_BASE_URL})
|
||||
|
||||
if not UserService.save(**user_info):
|
||||
print("\033[93m【ERROR】\033[0mcan't init admin.")
|
||||
@ -282,11 +283,8 @@ def init_llm_factory():
|
||||
pass
|
||||
|
||||
"""
|
||||
modify service_config
|
||||
drop table llm;
|
||||
drop table llm_factories;
|
||||
update tenant_llm set llm_factory='Tongyi-Qianwen' where llm_factory='通义千问';
|
||||
update tenant_llm set llm_factory='ZHIPU-AI' where llm_factory='智谱AI';
|
||||
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One';
|
||||
alter table knowledgebase modify avatar longtext;
|
||||
alter table user modify avatar longtext;
|
||||
|
@ -91,6 +91,8 @@ default_llm = {
|
||||
}
|
||||
LLM = get_base_config("user_default_llm", {})
|
||||
LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
|
||||
LLM_BASE_URL = LLM.get("base_url")
|
||||
|
||||
if LLM_FACTORY not in default_llm:
|
||||
print(
|
||||
"\33[91m【ERROR】\33[0m:",
|
||||
|
@ -1,99 +1,12 @@
|
||||
version: '2.2'
|
||||
|
||||
|
||||
include:
|
||||
- path: ./docker-compose-base.yml
|
||||
env_file: ./.env
|
||||
|
||||
services:
|
||||
es01:
|
||||
container_name: ragflow-es-01
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
||||
volumes:
|
||||
- esdata01:/usr/share/elasticsearch/data
|
||||
ports:
|
||||
- ${ES_PORT}:9200
|
||||
environment:
|
||||
- node.name=es01
|
||||
- cluster.name=${CLUSTER_NAME}
|
||||
- cluster.initial_master_nodes=es01
|
||||
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
|
||||
- bootstrap.memory_lock=false
|
||||
- xpack.security.enabled=false
|
||||
- TZ=${TIMEZONE}
|
||||
mem_limit: ${MEM_LIMIT}
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl http://localhost:9200"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: always
|
||||
|
||||
kibana:
|
||||
depends_on:
|
||||
es01:
|
||||
condition: service_healthy
|
||||
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
|
||||
container_name: ragflow-kibana
|
||||
volumes:
|
||||
- kibanadata:/usr/share/kibana/data
|
||||
ports:
|
||||
- ${KIBANA_PORT}:5601
|
||||
environment:
|
||||
- SERVERNAME=kibana
|
||||
- ELASTICSEARCH_HOSTS=http://es01:9200
|
||||
- TZ=${TIMEZONE}
|
||||
mem_limit: ${MEM_LIMIT}
|
||||
networks:
|
||||
- ragflow
|
||||
|
||||
mysql:
|
||||
image: mysql:5.7.18
|
||||
container_name: ragflow-mysql
|
||||
environment:
|
||||
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
|
||||
- TZ=${TIMEZONE}
|
||||
command:
|
||||
--max_connections=1000
|
||||
--character-set-server=utf8mb4
|
||||
--collation-server=utf8mb4_general_ci
|
||||
--default-authentication-plugin=mysql_native_password
|
||||
--tls_version="TLSv1.2,TLSv1.3"
|
||||
--init-file /data/application/init.sql
|
||||
ports:
|
||||
- ${MYSQL_PORT}:3306
|
||||
volumes:
|
||||
- mysql_data:/var/lib/mysql
|
||||
- ./init.sql:/data/application/init.sql
|
||||
networks:
|
||||
- ragflow
|
||||
healthcheck:
|
||||
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: always
|
||||
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
|
||||
container_name: ragflow-minio
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
- MINIO_ROOT_USER=${MINIO_USER}
|
||||
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
|
||||
- TZ=${TIMEZONE}
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: always
|
||||
|
||||
|
||||
ragflow:
|
||||
ragflow:
|
||||
depends_on:
|
||||
mysql:
|
||||
condition: service_healthy
|
||||
@ -116,18 +29,3 @@ services:
|
||||
networks:
|
||||
- ragflow
|
||||
restart: always
|
||||
|
||||
|
||||
volumes:
|
||||
esdata01:
|
||||
driver: local
|
||||
kibanadata:
|
||||
driver: local
|
||||
mysql_data:
|
||||
driver: local
|
||||
minio_data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
ragflow:
|
||||
driver: bridge
|
||||
|
110
docker/docker-compose-base.yml
Normal file
110
docker/docker-compose-base.yml
Normal file
@ -0,0 +1,110 @@
|
||||
version: '2.2'
|
||||
|
||||
services:
|
||||
es01:
|
||||
container_name: ragflow-es-01
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
||||
volumes:
|
||||
- esdata01:/usr/share/elasticsearch/data
|
||||
ports:
|
||||
- ${ES_PORT}:9200
|
||||
environment:
|
||||
- node.name=es01
|
||||
- cluster.name=${CLUSTER_NAME}
|
||||
- cluster.initial_master_nodes=es01
|
||||
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
|
||||
- bootstrap.memory_lock=false
|
||||
- xpack.security.enabled=false
|
||||
- cluster.max_shards_per_node=4096
|
||||
- TZ=${TIMEZONE}
|
||||
mem_limit: ${MEM_LIMIT}
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl http://localhost:9200"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: always
|
||||
|
||||
kibana:
|
||||
depends_on:
|
||||
es01:
|
||||
condition: service_healthy
|
||||
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
|
||||
container_name: ragflow-kibana
|
||||
volumes:
|
||||
- kibanadata:/usr/share/kibana/data
|
||||
ports:
|
||||
- ${KIBANA_PORT}:5601
|
||||
environment:
|
||||
- SERVERNAME=kibana
|
||||
- ELASTICSEARCH_HOSTS=http://es01:9200
|
||||
- TZ=${TIMEZONE}
|
||||
mem_limit: ${MEM_LIMIT}
|
||||
networks:
|
||||
- ragflow
|
||||
|
||||
mysql:
|
||||
image: mysql:5.7.18
|
||||
container_name: ragflow-mysql
|
||||
environment:
|
||||
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
|
||||
- TZ=${TIMEZONE}
|
||||
command:
|
||||
--max_connections=1000
|
||||
--character-set-server=utf8mb4
|
||||
--collation-server=utf8mb4_general_ci
|
||||
--default-authentication-plugin=mysql_native_password
|
||||
--tls_version="TLSv1.2,TLSv1.3"
|
||||
--init-file /data/application/init.sql
|
||||
ports:
|
||||
- ${MYSQL_PORT}:3306
|
||||
volumes:
|
||||
- mysql_data:/var/lib/mysql
|
||||
- ./init.sql:/data/application/init.sql
|
||||
networks:
|
||||
- ragflow
|
||||
healthcheck:
|
||||
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: always
|
||||
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
|
||||
container_name: ragflow-minio
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
- MINIO_ROOT_USER=${MINIO_USER}
|
||||
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
|
||||
- TZ=${TIMEZONE}
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: always
|
||||
|
||||
|
||||
volumes:
|
||||
esdata01:
|
||||
driver: local
|
||||
kibanadata:
|
||||
driver: local
|
||||
mysql_data:
|
||||
driver: local
|
||||
minio_data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
ragflow:
|
||||
driver: bridge
|
@ -1,98 +1,10 @@
|
||||
version: '2.2'
|
||||
|
||||
include:
|
||||
- path: ./docker-compose-base.yml
|
||||
env_file: ./.env
|
||||
|
||||
services:
|
||||
es01:
|
||||
container_name: ragflow-es-01
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
||||
volumes:
|
||||
- esdata01:/usr/share/elasticsearch/data
|
||||
ports:
|
||||
- ${ES_PORT}:9200
|
||||
environment:
|
||||
- node.name=es01
|
||||
- cluster.name=${CLUSTER_NAME}
|
||||
- cluster.initial_master_nodes=es01
|
||||
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
|
||||
- bootstrap.memory_lock=false
|
||||
- xpack.security.enabled=false
|
||||
- TZ=${TIMEZONE}
|
||||
mem_limit: ${MEM_LIMIT}
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl http://localhost:9200"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: always
|
||||
|
||||
kibana:
|
||||
depends_on:
|
||||
es01:
|
||||
condition: service_healthy
|
||||
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
|
||||
container_name: ragflow-kibana
|
||||
volumes:
|
||||
- kibanadata:/usr/share/kibana/data
|
||||
ports:
|
||||
- ${KIBANA_PORT}:5601
|
||||
environment:
|
||||
- SERVERNAME=kibana
|
||||
- ELASTICSEARCH_HOSTS=http://es01:9200
|
||||
- TZ=${TIMEZONE}
|
||||
mem_limit: ${MEM_LIMIT}
|
||||
networks:
|
||||
- ragflow
|
||||
|
||||
mysql:
|
||||
image: mysql:5.7.18
|
||||
container_name: ragflow-mysql
|
||||
environment:
|
||||
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
|
||||
- TZ=${TIMEZONE}
|
||||
command:
|
||||
--max_connections=1000
|
||||
--character-set-server=utf8mb4
|
||||
--collation-server=utf8mb4_general_ci
|
||||
--default-authentication-plugin=mysql_native_password
|
||||
--tls_version="TLSv1.2,TLSv1.3"
|
||||
--init-file /data/application/init.sql
|
||||
ports:
|
||||
- ${MYSQL_PORT}:3306
|
||||
volumes:
|
||||
- mysql_data:/var/lib/mysql
|
||||
- ./init.sql:/data/application/init.sql
|
||||
networks:
|
||||
- ragflow
|
||||
healthcheck:
|
||||
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: always
|
||||
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
|
||||
container_name: ragflow-minio
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
- MINIO_ROOT_USER=${MINIO_USER}
|
||||
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
|
||||
- TZ=${TIMEZONE}
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: always
|
||||
|
||||
|
||||
ragflow:
|
||||
depends_on:
|
||||
mysql:
|
||||
@ -107,6 +19,7 @@ services:
|
||||
- 443:443
|
||||
volumes:
|
||||
- ./service_conf.yaml:/ragflow/conf/service_conf.yaml
|
||||
- ./entrypoint.sh:/ragflow/entrypoint.sh
|
||||
- ./ragflow-logs:/ragflow/logs
|
||||
- ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf
|
||||
- ./nginx/proxy.conf:/etc/nginx/proxy.conf
|
||||
@ -116,18 +29,3 @@ services:
|
||||
networks:
|
||||
- ragflow
|
||||
restart: always
|
||||
|
||||
|
||||
volumes:
|
||||
esdata01:
|
||||
driver: local
|
||||
kibanadata:
|
||||
driver: local
|
||||
mysql_data:
|
||||
driver: local
|
||||
minio_data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
ragflow:
|
||||
driver: bridge
|
||||
|
@ -23,7 +23,7 @@ function watch_broker(){
|
||||
}
|
||||
|
||||
function task_bro(){
|
||||
sleep 60;
|
||||
sleep 160;
|
||||
watch_broker;
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@ es:
|
||||
user_default_llm:
|
||||
factory: 'Tongyi-Qianwen'
|
||||
api_key: 'sk-xxxxxxxxxxxxx'
|
||||
base_url: ''
|
||||
oauth:
|
||||
github:
|
||||
client_id: xxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
|
@ -10,14 +10,59 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import copy
|
||||
from io import BytesIO
|
||||
from docx import Document
|
||||
import re
|
||||
from deepdoc.parser.pdf_parser import PlainParser
|
||||
from rag.app import laws
|
||||
from rag.nlp import huqie, is_english, tokenize, naive_merge, tokenize_table, add_positions, tokenize_chunks
|
||||
from deepdoc.parser import PdfParser, ExcelParser
|
||||
from deepdoc.parser import PdfParser, ExcelParser, DocxParser
|
||||
from rag.settings import cron_logger
|
||||
|
||||
class Docx(DocxParser):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __clean(self, line):
|
||||
line = re.sub(r"\u3000", " ", line).strip()
|
||||
return line
|
||||
|
||||
def __call__(self, filename, binary=None, from_page=0, to_page=100000):
|
||||
self.doc = Document(
|
||||
filename) if not binary else Document(BytesIO(binary))
|
||||
pn = 0
|
||||
lines = []
|
||||
for p in self.doc.paragraphs:
|
||||
if pn > to_page:
|
||||
break
|
||||
if from_page <= pn < to_page and p.text.strip():
|
||||
lines.append(self.__clean(p.text))
|
||||
for run in p.runs:
|
||||
if 'lastRenderedPageBreak' in run._element.xml:
|
||||
pn += 1
|
||||
continue
|
||||
if 'w:br' in run._element.xml and 'type="page"' in run._element.xml:
|
||||
pn += 1
|
||||
tbls = []
|
||||
for tb in self.doc.tables:
|
||||
html= "<table>"
|
||||
for r in tb.rows:
|
||||
html += "<tr>"
|
||||
i = 0
|
||||
while i < len(r.cells):
|
||||
span = 1
|
||||
c = r.cells[i]
|
||||
for j in range(i+1, len(r.cells)):
|
||||
if c.text == r.cells[j].text:
|
||||
span += 1
|
||||
i = j
|
||||
i += 1
|
||||
html += f"<td>{c.text}</td>" if span == 1 else f"<td colspan='{span}'>{c.text}</td>"
|
||||
html += "</tr>"
|
||||
html += "</table>"
|
||||
tbls.append(((None, html), ""))
|
||||
return [(l, "") for l in lines if l], tbls
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
def __call__(self, filename, binary=None, from_page=0,
|
||||
@ -75,8 +120,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
sections = []
|
||||
if re.search(r"\.docx?$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
for txt in laws.Docx()(filename, binary):
|
||||
sections.append((txt, ""))
|
||||
sections, tbls = Docx()(filename, binary)
|
||||
res = tokenize_table(tbls, doc, eng)
|
||||
callback(0.8, "Finish parsing.")
|
||||
|
||||
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
||||
|
@ -223,8 +223,8 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000,
|
||||
continue
|
||||
if not str(row[clmns[j]]):
|
||||
continue
|
||||
if pd.isna(row[clmns[j]]):
|
||||
continue
|
||||
#if pd.isna(row[clmns[j]]):
|
||||
# continue
|
||||
fld = clmns_map[j][0]
|
||||
d[fld] = row[clmns[j]] if clmn_tys[j] != "text" else huqie.qie(
|
||||
row[clmns[j]])
|
||||
|
@ -170,3 +170,4 @@ class LocalLLM(Base):
|
||||
return ans, num_tokens_from_string(ans)
|
||||
except Exception as e:
|
||||
return "**ERROR**: " + str(e), 0
|
||||
|
||||
|
@ -68,6 +68,7 @@ def bullets_category(sections):
|
||||
|
||||
def is_english(texts):
|
||||
eng = 0
|
||||
if not texts: return False
|
||||
for t in texts:
|
||||
if re.match(r"[a-zA-Z]{2,}", t.strip()):
|
||||
eng += 1
|
||||
@ -112,8 +113,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
|
||||
d = copy.deepcopy(doc)
|
||||
tokenize(d, rows, eng)
|
||||
d["content_with_weight"] = rows
|
||||
d["image"] = img
|
||||
add_positions(d, poss)
|
||||
if img: d["image"] = img
|
||||
if poss: add_positions(d, poss)
|
||||
res.append(d)
|
||||
continue
|
||||
de = "; " if eng else "; "
|
||||
|
@ -46,7 +46,7 @@ class Dealer:
|
||||
"k": topk,
|
||||
"similarity": sim,
|
||||
"num_candidates": topk * 2,
|
||||
"query_vector": qv
|
||||
"query_vector": list(qv)
|
||||
}
|
||||
|
||||
def search(self, req, idxnm, emb_mdl=None):
|
||||
|
Loading…
x
Reference in New Issue
Block a user