import "cheerio";
import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { OpenAIEmbeddings } from "@langchain/openai";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
import {
 ChatPromptTemplate,
 MessagesPlaceholder,
} from "@langchain/core/prompts";
import { ChatOpenAI } from "@langchain/openai";
import { AIMessage, HumanMessage } from "@langchain/core/messages";
import type { BaseMessage } from "@langchain/core/messages";
import {
 RunnablePassthrough,
 RunnableSequence,
} from "@langchain/core/runnables";
import { RunnableBranch } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers";

// LLM の対話モデル
const llm = new ChatOpenAI({
 model: "gpt-3.5-turbo",
 temperature: 0,
});

// Webドキュメントをダウンロード
const loader = new CheerioWebBaseLoader(
 "https://ja.wikipedia.org/wiki/LangChain"
);
const rawDocs = await loader.load();

// テキストを分割してチャンクを作成
const textSplitter = new RecursiveCharacterTextSplitter({
 chunkSize: 500,
 chunkOverlap: 0,
});
const allSplits = await textSplitter.splitDocuments(rawDocs);

// ベクトルストアを作成
const vectorstore = await MemoryVectorStore.fromDocuments(
 allSplits,
 new OpenAIEmbeddings()
);

// ベクターストアから情報を取得するRetrieverを作成
const retriever = vectorstore.asRetriever(3);

// システムテンプレート
const SYSTEM_TEMPLATE = `# 指示
以下の質問に回答してください。質問に対する情報がコンテキストによって提供されない場合、または明確な情報源が存在しない場合は、『わかりません』とだけ回答してください。推測や創作はしないでください。

質問に対する情報が見つからない場合、必ず『わかりません』と回答してください。例えば、以下の質問に対してコンテキストに情報が含まれない場合です。

質問：「少年ジャンプで掲載されていた『ナルト』について教えて

# コンテキスト
{context}
`;

// 質問応答のプロンプト
const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([
 ["system", SYSTEM_TEMPLATE],
 new MessagesPlaceholder("messages"),
]);

// ドキュメントチェーンを作成
const documentChain = await createStuffDocumentsChain({
 llm,
 prompt: questionAnsweringPrompt,
});

// ユーザーからの問い合わせを取得
const parseRetrieverInput = (params: { messages: BaseMessage[] }) => {
 const lastMessage = params.messages[params.messages.length - 1];
 if (lastMessage) {
 return lastMessage.content;
 }
 return "";
};

// Retrieverとドキュメントチェーンを組み合わせたチェーンを作成
const retrievalChain = RunnablePassthrough.assign({
 context: RunnableSequence.from([parseRetrieverInput, retriever]),
}).assign({
 answer: documentChain,
});

const result = await retrievalChain.invoke({
 messages: [new HumanMessage("LangChainのライセンス形式は？")],
});
console.log(result)

const result2 = await retrievalChain.invoke({
 messages: [new HumanMessage("もっと教えて")],
});
console.log(result2);

const queryTransformPrompt = ChatPromptTemplate.fromMessages([
 new MessagesPlaceholder("messages"),
 [
 "user",
 "上記の会話を踏まえ、会話に関連する情報を得るための検索クエリを生成してください。クエリのみを回答し、それ以外のことは書かないでください。",
 ],
]);
const queryTransformationChain = queryTransformPrompt.pipe(llm);
const result3 = await queryTransformationChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 ],
});
console.log(result3)

const queryTransformingRetrieverChain = RunnableBranch.from([
 [
 (params: { messages: BaseMessage[] }) => params.messages.length === 1,
 RunnableSequence.from([parseRetrieverInput, retriever]),
 ],
 queryTransformPrompt.pipe(llm).pipe(new StringOutputParser()).pipe(retriever),
]).withConfig({ runName: "chat_retriever_chain" });

const conversationalRetrievalChain = RunnablePassthrough.assign({
 context: queryTransformingRetrieverChain,
}).assign({
 answer: documentChain,
});

const result4 = await conversationalRetrievalChain.invoke({
 messages: [new HumanMessage("LangChainのライセンス形式は？")],
});
console.log(result4)

const result5 = await conversationalRetrievalChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 ],
});
console.log(result5)

const result6 = await conversationalRetrievalChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 new HumanMessage("転生したらスライムだった件の作者は誰？")
 ],
});
console.log(result6)

ローカルで実行します。

$ pnpm vite-node demo05.ts

コードの解説

「もっと教えて」と LLM に問い合わせると、LLM は「もっと教えて」という質問に対して、ベクターストアから関連ドキュメントを取得し、取得したドキュメントをもとに回答を返します。LLM は以前のやり取りを記憶していないため、愚直に「もっと教えて」というテキストをベクターストアに問い合わせます。

ように制約をかけているため、「申し訳ありませんが、どのような情報を求めているのか具体的に教えていただけますか？具体的な質問やテーマがあればお答えできるかもしれません。」と回答されます。

LLM には知らない情報については回答できない。

const result2 = await retrievalChain.invoke({
 messages: [new HumanMessage("もっと教えて")],
});
console.log(result2);

この問題を解決するには、過去の会話履歴を含めて、LLM に問い合わせる必要があります。例えば、Retrieval Chain には連結詩ないで、過去の会話を LLM にわたすと、過去の会話をもとに回答を返すことができます。

const queryTransformPrompt = ChatPromptTemplate.fromMessages([
 new MessagesPlaceholder("messages"),
 [
 "user",
 "上記の会話を踏まえ、会話に関連する情報を得るための検索クエリを生成してください。クエリのみを回答し、それ以外のことは書かないでください。",
 ],
]);
const queryTransformationChain = queryTransformPrompt.pipe(llm);
const result3 = await queryTransformationChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 ],
});
console.log(result3)

では、documentChain, retrievalChain に連結させます。これで、過去の会話をもとに回答を返すことができます。

const queryTransformingRetrieverChain = RunnableBranch.from([
 [
 (params: { messages: BaseMessage[] }) => params.messages.length === 1,
 RunnableSequence.from([parseRetrieverInput, retriever]),
 ],
 queryTransformPrompt.pipe(llm).pipe(new StringOutputParser()).pipe(retriever),
]).withConfig({ runName: "chat_retriever_chain" });

const conversationalRetrievalChain = RunnablePassthrough.assign({
 context: queryTransformingRetrieverChain,
}).assign({
 answer: documentChain,
});

まず、シンプルとに問い合わせます。LangChain について教えてくれます。

const result4 = await conversationalRetrievalChain.invoke({
 messages: [new HumanMessage("LangChainのライセンス形式は？")],
});
console.log(result4)

さらに追加で「もっと教えて」と質問すると、「LangChain」の詳細な情報を教えてくれます。

const result5 = await conversationalRetrievalChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 ],
});
console.log(result5)

今度は別の質問をすると、LLM が持っている汎用的な知識から回答を返します。

const result6 = await conversationalRetrievalChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 new HumanMessage("転生したらスライムだった件の作者は誰？")
 ],
});
console.log(result6)

Streaming

ここでは LLM の出力結果をストリーミング配信できるように変更詩ます。

コードの作成

$ touch demo06.ts

demo06.ts

import "cheerio";
import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { OpenAIEmbeddings } from "@langchain/openai";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
import {
 ChatPromptTemplate,
 MessagesPlaceholder,
} from "@langchain/core/prompts";
import { ChatOpenAI } from "@langchain/openai";
import { AIMessage, HumanMessage } from "@langchain/core/messages";
import type { BaseMessage } from "@langchain/core/messages";
import {
 RunnablePassthrough,
 RunnableSequence,
} from "@langchain/core/runnables";
import { RunnableBranch } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers";

// LLM の対話モデル
const llm = new ChatOpenAI({
 model: "gpt-3.5-turbo",
 temperature: 0,
});

// Webドキュメントをダウンロード
const loader = new CheerioWebBaseLoader(
 "https://ja.wikipedia.org/wiki/LangChain"
);
const rawDocs = await loader.load();

// テキストを分割してチャンクを作成
const textSplitter = new RecursiveCharacterTextSplitter({
 chunkSize: 500,
 chunkOverlap: 0,
});
const allSplits = await textSplitter.splitDocuments(rawDocs);

// ベクトルストアを作成
const vectorstore = await MemoryVectorStore.fromDocuments(
 allSplits,
 new OpenAIEmbeddings()
);

// ベクターストアから情報を取得するRetrieverを作成
const retriever = vectorstore.asRetriever(3);

// システムテンプレート
const SYSTEM_TEMPLATE = `# 指示
以下の質問に回答してください。質問に対する情報がコンテキストによって提供されない場合、または明確な情報源が存在しない場合は、『わかりません』とだけ回答してください。推測や創作はしないでください。

質問に対する情報が見つからない場合、必ず『わかりません』と回答してください。例えば、以下の質問に対してコンテキストに情報が含まれない場合です。

質問：「少年ジャンプで掲載されていた『ナルト』について教えて

# コンテキスト
{context}
`;

// 質問応答のプロンプト
const questionAnsweringPrompt = ChatPromptTemplate.fromMessages([
 ["system", SYSTEM_TEMPLATE],
 new MessagesPlaceholder("messages"),
]);

// ドキュメントチェーンを作成
const documentChain = await createStuffDocumentsChain({
 llm,
 prompt: questionAnsweringPrompt,
});

// ユーザーからの問い合わせを取得
const parseRetrieverInput = (params: { messages: BaseMessage[] }) => {
 const lastMessage = params.messages[params.messages.length - 1];
 if (lastMessage) {
 return lastMessage.content;
 }
 return "";
};

// Retrieverとドキュメントチェーンを組み合わせたチェーンを作成
const retrievalChain = RunnablePassthrough.assign({
 context: RunnableSequence.from([parseRetrieverInput, retriever]),
}).assign({
 answer: documentChain,
});

const result = await retrievalChain.invoke({
 messages: [new HumanMessage("LangChainのライセンス形式は？")],
});
// console.log(result)

const result2 = await retrievalChain.invoke({
 messages: [new HumanMessage("もっと教えて")],
});
// console.log(result2);

const queryTransformPrompt = ChatPromptTemplate.fromMessages([
 new MessagesPlaceholder("messages"),
 [
 "user",
 "上記の会話を踏まえ、会話に関連する情報を得るための検索クエリを生成してください。クエリのみを回答し、それ以外のことは書かないでください。",
 ],
]);
const queryTransformationChain = queryTransformPrompt.pipe(llm);
const result3 = await queryTransformationChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 ],
});
// console.log(result3)

const queryTransformingRetrieverChain = RunnableBranch.from([
 [
 (params: { messages: BaseMessage[] }) => params.messages.length === 1,
 RunnableSequence.from([parseRetrieverInput, retriever]),
 ],
 queryTransformPrompt.pipe(llm).pipe(new StringOutputParser()).pipe(retriever),
]).withConfig({ runName: "chat_retriever_chain" });

const conversationalRetrievalChain = RunnablePassthrough.assign({
 context: queryTransformingRetrieverChain,
}).assign({
 answer: documentChain,
});

const result4 = await conversationalRetrievalChain.invoke({
 messages: [new HumanMessage("LangChainのライセンス形式は？")],
});
// console.log(result4)

const result5 = await conversationalRetrievalChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 ],
});
// console.log(result5)

const result6 = await conversationalRetrievalChain.invoke({
 messages: [
 new HumanMessage("LangChainのライセンス形式は？"),
 new AIMessage(
 "LangChainのライセンス形式はMITライセンスです。"
 ),
 new HumanMessage("もっと教えて"),
 new HumanMessage("転生したらスライムだった件の作者は誰？")
 ],
});
// console.log(result6)

const stream = await conversationalRetrievalChain.stream({
 messages: [
 new HumanMessage("Can LangSmith help test my LLM applications?"),
 new AIMessage(
 "Yes, LangSmith can help test and evaluate your LLM applications. It allows you to quickly edit examples and add them to datasets to expand the surface area of your evaluation sets or to fine-tune a model for improved quality or reduced costs. Additionally, LangSmith can be used to monitor your application, log all traces, visualize latency and token usage statistics, and troubleshoot specific issues as they arise."
 ),
 new HumanMessage("Tell me more!"),
 ],
});

for await (const chunk of stream) {
 console.log(chunk);
}

ローカルで実行します。

$ pnpm vite-node demo06.ts

コードの解説

LCEL で構成された chain は .stream を利用し出力結果をストリーミングできます。

const stream = await conversationalRetrievalChain.stream({
 messages: [
 new HumanMessage("Can LangSmith help test my LLM applications?"),
 new AIMessage(
 "Yes, LangSmith can help test and evaluate your LLM applications. It allows you to quickly edit examples and add them to datasets to expand the surface area of your evaluation sets or to fine-tune a model for improved quality or reduced costs. Additionally, LangSmith can be used to monitor your application, log all traces, visualize latency and token usage statistics, and troubleshoot specific issues as they arise."
 ),
 new HumanMessage("Tell me more!"),
 ],
});

for await (const chunk of stream) {
 console.log(chunk);
}

さいごに

この記事では、公式のドキュメントを使いながら LangChain で外部から入力された情報を参照する方法を紹介しました。

作業リポジトリ

こちらが作業リポジトリです。

https://github.com/hayato94087/langchain-chatbots_retrieval-sample

Discussion

👁 Image

URL: https://zenn.dev/hayato94087/articles/bb858a6dabb286

⇱ LangChain で外部からデータを参照後編（Node.js）

はじめに

Query transformation

コードの作成

コードの解説

Streaming

コードの作成

コードの解説

さいごに

作業リポジトリ

Discussion

URL: https://zenn.dev/hayato94087/articles/bb858a6dabb286

⇱ LangChain で外部からデータを参照 後編（Node.js）

はじめに

Query transformation

コードの作成

コードの解説

Streaming

コードの作成

コードの解説

さいごに

作業リポジトリ

Discussion

⇱ LangChain で外部からデータを参照後編（Node.js）