From cdea1d0a853d74f64db00fe7493d618a84f0d731 Mon Sep 17 00:00:00 2001
From: Jin Hai
Date: Sat, 1 Jun 2024 16:24:10 +0800
Subject: [PATCH] Update readme and add license (#1018)
### What problem does this PR solve?
- Update readme
- Add license
### Type of change
- [x] Documentation Update
---------
Signed-off-by: Jin Hai
---
README.md | 3 ++-
README_ja.md | 26 ++++++++++---------
README_zh.md | 24 ++++++++++-------
deepdoc/parser/__init__.py | 13 +++++++++-
deepdoc/parser/docx_parser.py | 14 +++++++++-
deepdoc/parser/excel_parser.py | 14 +++++++++-
deepdoc/parser/pdf_parser.py | 14 +++++++++-
deepdoc/parser/ppt_parser.py | 1 +
deepdoc/parser/resume/__init__.py | 13 ++++++++++
.../parser/resume/entities/corporations.py | 13 ++++++++++
deepdoc/parser/resume/entities/degrees.py | 13 ++++++++++
deepdoc/parser/resume/entities/industries.py | 12 +++++++++
deepdoc/parser/resume/entities/regions.py | 13 ++++++++++
deepdoc/parser/resume/entities/schools.py | 14 +++++++++-
deepdoc/parser/resume/step_one.py | 14 +++++++++-
deepdoc/parser/resume/step_two.py | 14 +++++++++-
deepdoc/vision/__init__.py | 13 ++++++++++
deepdoc/vision/postprocess.py | 13 ++++++++++
18 files changed, 211 insertions(+), 30 deletions(-)
diff --git a/README.md b/README.md
index e405cfe00..d330e7000 100644
--- a/README.md
+++ b/README.md
@@ -180,7 +180,7 @@ Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io).
> With default settings, you only need to enter `http://IP_OF_YOUR_MACHINE` (**sans** port number) as the default HTTP serving port `80` can be omitted when using the default configurations.
6. In [service_conf.yaml](./docker/service_conf.yaml), select the desired LLM factory in `user_default_llm` and update the `API_KEY` field with the corresponding API key.
- > See [./docs/guides/llm_api_key_setup.md](./docs/guides/llm_api_key_setup.md) for more information.
+ > See [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup) for more information.
_The show is now on!_
@@ -326,6 +326,7 @@ See the [RAGFlow Roadmap 2024](https://github.com/infiniflow/ragflow/issues/162)
- [Discord](https://discord.gg/4XxujFgUN7)
- [Twitter](https://twitter.com/infiniflowai)
+- [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
## 🙌 Contributing
diff --git a/README_ja.md b/README_ja.md
index b0ba9c452..a89a22f32 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -24,6 +24,14 @@
+
+
## 💡 RAGFlow とは?
[RAGFlow](https://ragflow.io/) は、深い文書理解に基づいたオープンソースの RAG (Retrieval-Augmented Generation) エンジンである。LLM(大規模言語モデル)を組み合わせることで、様々な複雑なフォーマットのデータから根拠のある引用に裏打ちされた、信頼できる質問応答機能を実現し、あらゆる規模のビジネスに適した RAG ワークフローを提供します。
@@ -40,15 +48,6 @@
- 2024-05-21 ストリーミング出力とテキストチャンク取得APIをサポート。
- 2024-05-15 OpenAI GPT-4oを統合しました。
- 2024-05-08 LLM DeepSeek-V2を統合しました。
-- 2024-04-26 「ファイル管理」機能を追加しました。
-- 2024-04-19 会話 API をサポートします ([詳細](./docs/references/api.md))。
-- 2024-04-16 [BCEmbedding](https://github.com/netease-youdao/BCEmbedding) から埋め込みモデル「bce-embedding-base_v1」を追加します。
-- 2024-04-16 [FastEmbed](https://github.com/qdrant/fastembed) は、軽量かつ高速な埋め込み用に設計されています。
-- 2024-04-11 ローカル LLM デプロイメント用に [Xinference](./docs/guides/deploy_local_llm.md) をサポートします。
-- 2024-04-10 メソッド「Laws」に新しいレイアウト認識モデルを追加します。
-- 2024-04-08 [Ollama](./docs/guides/deploy_local_llm.md) を使用した大規模モデルのローカライズされたデプロイメントをサポートします。
-- 2024-04-07 中国語インターフェースをサポートします。
-
## 🌟 主な特徴
@@ -162,7 +161,7 @@
> デフォルトの設定を使用する場合、デフォルトの HTTP サービングポート `80` は省略できるので、与えられたシナリオでは、`http://IP_OF_YOUR_MACHINE`(ポート番号は省略)だけを入力すればよい。
6. [service_conf.yaml](./docker/service_conf.yaml) で、`user_default_llm` で希望の LLM ファクトリを選択し、`API_KEY` フィールドを対応する API キーで更新する。
- > 詳しくは [./docs/guides/llm_api_key_setup.md](./docs/guides/llm_api_key_setup.md) を参照してください。
+ > 詳しくは [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup) を参照してください。
_これで初期設定完了!ショーの開幕です!_
@@ -261,8 +260,10 @@ $ bash ./entrypoint.sh
## 📚 ドキュメンテーション
-- [Quickstart](./docs/quickstart.md)
-- [FAQ](./docs/references/faq.md)
+- [Quickstart](https://ragflow.io/docs/dev/)
+- [User guide](https://ragflow.io/docs/dev/category/user-guides)
+- [Reference](https://ragflow.io/docs/dev/category/references)
+- [FAQ](https://ragflow.io/docs/dev/faq)
## 📜 ロードマップ
@@ -272,6 +273,7 @@ $ bash ./entrypoint.sh
- [Discord](https://discord.gg/4XxujFgUN7)
- [Twitter](https://twitter.com/infiniflowai)
+- [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
## 🙌 コントリビュート
diff --git a/README_zh.md b/README_zh.md
index 51091c608..cbf58142b 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -23,6 +23,14 @@
+
+
## 💡 RAGFlow 是什么?
[RAGFlow](https://ragflow.io/) 是一款基于深度文档理解构建的开源 RAG(Retrieval-Augmented Generation)引擎。RAGFlow 可以为各种规模的企业及个人提供一套精简的 RAG 工作流程,结合大语言模型(LLM)针对用户各类不同的复杂格式数据提供可靠的问答以及有理有据的引用。
@@ -39,13 +47,6 @@
- 2024-05-21 支持流式结果输出和文本块获取API。
- 2024-05-15 集成大模型 OpenAI GPT-4o。
- 2024-05-08 集成大模型 DeepSeek。
-- 2024-04-26 增添了'文件管理'功能。
-- 2024-04-19 支持对话 API ([更多](./docs/references/api.md))。
-- 2024-04-16 集成嵌入模型 [BCEmbedding](https://github.com/netease-youdao/BCEmbedding) 和 专为轻型和高速嵌入而设计的 [FastEmbed](https://github.com/qdrant/fastembed)。
-- 2024-04-11 支持用 [Xinference](./docs/guides/deploy_local_llm.md) 本地化部署大模型。
-- 2024-04-10 为‘Laws’版面分析增加了底层模型。
-- 2024-04-08 支持用 [Ollama](./docs/guides/deploy_local_llm.md) 本地化部署大模型。
-- 2024-04-07 支持中文界面。
## 🌟 主要功能
@@ -159,7 +160,7 @@
> 上面这个例子中,您只需输入 http://IP_OF_YOUR_MACHINE 即可:未改动过配置则无需输入端口(默认的 HTTP 服务端口 80)。
6. 在 [service_conf.yaml](./docker/service_conf.yaml) 文件的 `user_default_llm` 栏配置 LLM factory,并在 `API_KEY` 栏填写和你选择的大模型相对应的 API key。
- > 详见 [./docs/guides/llm_api_key_setup.md](./docs/guides/llm_api_key_setup.md)。
+ > 详见 [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup)。
_好戏开始,接着奏乐接着舞!_
@@ -279,8 +280,10 @@ $ systemctl start nginx
```
## 📚 技术文档
-- [Quickstart](./docs/quickstart.md)
-- [FAQ](./docs/references/faq.md)
+- [Quickstart](https://ragflow.io/docs/dev/)
+- [User guide](https://ragflow.io/docs/dev/category/user-guides)
+- [Reference](https://ragflow.io/docs/dev/category/references)
+- [FAQ](https://ragflow.io/docs/dev/faq)
## 📜 路线图
@@ -290,6 +293,7 @@ $ systemctl start nginx
- [Discord](https://discord.gg/4XxujFgUN7)
- [Twitter](https://twitter.com/infiniflowai)
+- [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
## 🙌 贡献指南
diff --git a/deepdoc/parser/__init__.py b/deepdoc/parser/__init__.py
index 99ba946f9..dedd2c6ac 100644
--- a/deepdoc/parser/__init__.py
+++ b/deepdoc/parser/__init__.py
@@ -1,4 +1,15 @@
-
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
from .pdf_parser import RAGFlowPdfParser as PdfParser, PlainParser
from .docx_parser import RAGFlowDocxParser as DocxParser
diff --git a/deepdoc/parser/docx_parser.py b/deepdoc/parser/docx_parser.py
index 42923f0fc..8e13e2560 100644
--- a/deepdoc/parser/docx_parser.py
+++ b/deepdoc/parser/docx_parser.py
@@ -1,4 +1,16 @@
-# -*- coding: utf-8 -*-
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
from docx import Document
import re
import pandas as pd
diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py
index 1016b115c..4bb509061 100644
--- a/deepdoc/parser/excel_parser.py
+++ b/deepdoc/parser/excel_parser.py
@@ -1,4 +1,16 @@
-# -*- coding: utf-8 -*-
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
from openpyxl import load_workbook
import sys
from io import BytesIO
diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py
index 2971858c0..a33c71662 100644
--- a/deepdoc/parser/pdf_parser.py
+++ b/deepdoc/parser/pdf_parser.py
@@ -1,4 +1,16 @@
-# -*- coding: utf-8 -*-
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
import os
import random
diff --git a/deepdoc/parser/ppt_parser.py b/deepdoc/parser/ppt_parser.py
index 9b67336d0..65c7f63d4 100644
--- a/deepdoc/parser/ppt_parser.py
+++ b/deepdoc/parser/ppt_parser.py
@@ -10,6 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+
from io import BytesIO
from pptx import Presentation
diff --git a/deepdoc/parser/resume/__init__.py b/deepdoc/parser/resume/__init__.py
index 658268d03..8fe338dae 100644
--- a/deepdoc/parser/resume/__init__.py
+++ b/deepdoc/parser/resume/__init__.py
@@ -1,3 +1,16 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
import datetime
diff --git a/deepdoc/parser/resume/entities/corporations.py b/deepdoc/parser/resume/entities/corporations.py
index 54970866f..d653b3e43 100644
--- a/deepdoc/parser/resume/entities/corporations.py
+++ b/deepdoc/parser/resume/entities/corporations.py
@@ -1,3 +1,16 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
import re,json,os
import pandas as pd
from rag.nlp import rag_tokenizer
diff --git a/deepdoc/parser/resume/entities/degrees.py b/deepdoc/parser/resume/entities/degrees.py
index b9b15ef47..dc2d5bc17 100644
--- a/deepdoc/parser/resume/entities/degrees.py
+++ b/deepdoc/parser/resume/entities/degrees.py
@@ -1,3 +1,16 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
TBL = {"94":"EMBA",
"6":"MBA",
"95":"MPA",
diff --git a/deepdoc/parser/resume/entities/industries.py b/deepdoc/parser/resume/entities/industries.py
index f0fb19c07..9eeb10e55 100644
--- a/deepdoc/parser/resume/entities/industries.py
+++ b/deepdoc/parser/resume/entities/industries.py
@@ -1,3 +1,15 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
TBL = {"1":{"name":"IT/通信/电子","parent":"0"},
"2":{"name":"互联网","parent":"0"},
diff --git a/deepdoc/parser/resume/entities/regions.py b/deepdoc/parser/resume/entities/regions.py
index 54b655e6b..e1707530b 100644
--- a/deepdoc/parser/resume/entities/regions.py
+++ b/deepdoc/parser/resume/entities/regions.py
@@ -1,3 +1,16 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
TBL = {
"2":{"name":"北京","parent":"1"},
"3":{"name":"天津","parent":"1"},
diff --git a/deepdoc/parser/resume/entities/schools.py b/deepdoc/parser/resume/entities/schools.py
index e1abfa51b..d90d9fde0 100644
--- a/deepdoc/parser/resume/entities/schools.py
+++ b/deepdoc/parser/resume/entities/schools.py
@@ -1,4 +1,16 @@
-# -*- coding: UTF-8 -*-
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
import os, json,re,copy
import pandas as pd
current_file_path = os.path.dirname(os.path.abspath(__file__))
diff --git a/deepdoc/parser/resume/step_one.py b/deepdoc/parser/resume/step_one.py
index 7ae9839b4..34c474248 100644
--- a/deepdoc/parser/resume/step_one.py
+++ b/deepdoc/parser/resume/step_one.py
@@ -1,4 +1,16 @@
-# -*- coding: utf-8 -*-
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
import json
from deepdoc.parser.resume.entities import degrees, regions, industries
diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py
index ff6116577..00282b7d6 100644
--- a/deepdoc/parser/resume/step_two.py
+++ b/deepdoc/parser/resume/step_two.py
@@ -1,4 +1,16 @@
-# -*- coding: utf-8 -*-
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
import re, copy, time, datetime, demjson3, \
traceback, signal
import numpy as np
diff --git a/deepdoc/vision/__init__.py b/deepdoc/vision/__init__.py
index a312a547f..46afe0127 100644
--- a/deepdoc/vision/__init__.py
+++ b/deepdoc/vision/__init__.py
@@ -1,3 +1,16 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
import pdfplumber
from .ocr import OCR
diff --git a/deepdoc/vision/postprocess.py b/deepdoc/vision/postprocess.py
index 6762b7111..198089c81 100644
--- a/deepdoc/vision/postprocess.py
+++ b/deepdoc/vision/postprocess.py
@@ -1,3 +1,16 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
import copy
import re
import numpy as np