Notebook: Chatting with NVIDIA Financial Reports
In this notebook, we are going to use milvus as vectorstore, the mixtral_8x7b as LLM and ai-embed-qa-4 embedding provided by NVIDIA_AI_Endpoint as LLM and embedding model, and build a simply RAG example for chatting with NVIDIA Financial Reports.
NVIDIA financial reports are available pubicly in nvidianews.
Below is an example of financial report in Fiscal Year 2024 Q1
https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2024
Step 1 - Export the NVIDIA_API_KEY
Supply the NVIDIA_API_KEY in this notebook when you run the cell below
import getpass
import os
if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
print("Valid NVIDIA_API_KEY already in environment. Delete to reset")
else:
nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ")
assert nvapi_key.startswith("nvapi-"), f"{nvapi_key[:5]}... is not a valid key"
os.environ["NVIDIA_API_KEY"] = nvapi_key
Step 2 - initialize the LLM and Embedding Model
Here we will use mixtral_8x7b
# test run and see that you can genreate a respond successfully
from langchain_nvidia_ai_endpoints import ChatNVIDIA,NVIDIAEmbeddings
llm = ChatNVIDIA(model="ai-mixtral-8x7b-instruct", nvidia_api_key=nvapi_key, max_tokens=1024)
from langchain.vectorstores import Milvus
import torch
import time
embedder_document = NVIDIAEmbeddings(model="ai-embed-qa-4", model_type="passage")
embedder_query = NVIDIAEmbeddings(model="ai-embed-qa-4", model_type="query")
Step 3 - Ingest http files
3.1 Download http files covering financial reports from Fiscal year 2020 to 2024
import requests
urls_content = []
url_template1 = "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-{quarter}-quarter-fiscal-{year}"
url_template2 = "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-{quarter}-quarter-and-fiscal-{year}"
for quarter in ["first", "second", "third", "fourth"]:
for year in range(2020,2025):
args = {"quarter":quarter, "year": str(year)}
if quarter == "fourth":
urls_content.append(requests.get(url_template2.format(**args)).content)
else:
urls_content.append(requests.get(url_template1.format(**args)).content)
3.2 Parse html files
# extract the url, title, text content, and tables in the html
from bs4 import BeautifulSoup
import markdownify
def extract_url_title_time(soup):
url = ""
title = ""
revised_time = ""
tables = []
try:
if soup.find("title"):
title = str(soup.find("title").string)
og_url_meta = soup.find("meta", property="og:url")
if og_url_meta:
url = og_url_meta.get("content", "")
for table in soup.find_all("table"):
tables.append(markdownify.markdownify(str(table)))
table.decompose()
text_content = soup.get_text(separator=' ', strip=True)
text_content = ' '.join(text_content.split())
return url, title,text_content, tables
except:
print("parse error")
return "", "", "", "", []
parsed_htmls = []
for url_content in urls_content:
soup = BeautifulSoup(url_content, 'html.parser')
url, title, content, tables = extract_url_title_time(soup)
parsed_htmls.append({"url":url, "title":title, "content":content, "tables":tables})
3.3 Summarize tables
# summarize tables
def get_table_summary(table, title, llm):
res = ""
try:
#table = markdownify.markdownify(table)
prompt = f"""
[INST] You are a virtual assistant. Your task is to understand the content of TABLE in the markdown format.
TABLE is from "{title}". Summarize the information in TABLE into SUMMARY. SUMMARY MUST be concise. Return SUMMARY only and nothing else.
TABLE: ```{table}```
Summary:
[/INST]
"""
result = llm.invoke(prompt)
res = result.content
except Exception as e:
print(f"Error: {e} while getting table summary from LLM")
if not os.getenv("NVIDIA_API_KEY", False):
print("NVIDIA_API_KEY not set")
pass
finally:
return res
for parsed_item in parsed_htmls:
title = parsed_item['title']
for idx, table in enumerate(parsed_item['tables']):
print(f"parsing tables in {title}...")
table = get_table_summary(table, title, llm)
parsed_item['tables'][idx] = table
3.4 Split the text/table in chunks, extract embedding for each chunk, and store the embeddinges into milvus vectordb
from langchain.docstore.document import Document
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
TEXT_SPLITTER_MODEL = "intfloat/e5-large-v2"
TEXT_SPLITTER_CHUNCK_SIZE = 200
TEXT_SPLITTER_CHUNCK_OVERLAP = 50
text_splitter = SentenceTransformersTokenTextSplitter(
model_name=TEXT_SPLITTER_MODEL,
tokens_per_chunk=TEXT_SPLITTER_CHUNCK_SIZE,
chunk_overlap=TEXT_SPLITTER_CHUNCK_OVERLAP,
)
documents = []
for parsed_item in parsed_htmls:
title = parsed_item['title']
url = parsed_item['url']
text_content = parsed_item['content']
documents.append(Document(page_content=text_content, metadata = {'title':title, 'url':url}))
for idx, table in enumerate(parsed_item['tables']):
table_content = table
documents.append(Document(page_content=table, metadata = {'title':title, 'url':url}))
documents = text_splitter.split_documents(documents)
print(f"obtain {len(documents)} chunks")
COLLECTION_NAME = "NVIDIA_Finance"
from langchain.vectorstores import Milvus
vectorstore = Milvus(
embedding_function=embedder_document,
collection_name=COLLECTION_NAME,
connection_args={
"host": "milvus",
"port": "19530"},
drop_old = True,
auto_id = True
)
vectorstore.add_documents(documents)
docs = vectorstore.similarity_search("what are 2024 Q3 revenues? ")
Step 4 Retrieve and Generate Answer
from langchain.prompts.prompt import PromptTemplate
PROMPT_TEMPLATE = """[INST]You are a friendly virtual assistant and maintain a conversational, polite, patient, friendly and gender neutral tone throughout the conversation.
Your task is to understand the QUESTION, read the Content list from the DOCUMENT delimited by ```, generate an answer based on the Content, and provide references used in answering the question in the format "[Title](URL)".
Do not depend on outside knowledge or fabricate responses.
DOCUMENT: ```{context}```
Your response should follow these steps:
1. The answer should be short and concise, clear.
* If detailed instructions are required, present them in an ordered list or bullet points.
2. If the answer to the question is not available in the provided DOCUMENT, ONLY respond that you couldn't find any information related to the QUESTION, and do not show references and citations.
3. Citation
* ALWAYS start the citation section with "Here are the sources to generate response." and follow with references in markdown link format [Title](URL) to support the answer.
* Use Bullets to display the reference [Title](URL).
* You MUST ONLY use the URL extracted from the DOCUMENT as the reference link. DO NOT fabricate or use any link outside the DOCUMENT as reference.
* Avoid over-citation. Only include references that were directly used in generating the response.
* If no reference URL can be provided, remove the entire citation section.
* The Citation section can include one or more references. DO NOT include same URL as multiple references. ALWAYS append the citation section at the end of your response.
* You MUST follow the below format as an example for this citation section:
Here are the sources used to generate this response:
* [Title](URL)
[/INST]
[INST]
QUESTION: {question}
FINAL ANSWER:[/INST]"""
prompt_template = PromptTemplate(template=PROMPT_TEMPLATE, input_variables=["context", "question"])
def build_context(chunks):
context = ""
for chunk in chunks:
context = context + "\n Content: " + chunk.page_content + " | Title: (" + chunk.metadata["title"] + ") | URL: (" + chunk.metadata.get("url", "source") + ")"
return context
def generate_answer(llm, vectorstore, prompt_template, question):
retrieved_chunks = vectorstore.similarity_search(question)
context = build_context(retrieved_chunks)
args = {"context":context, "question":question}
prompt = prompt_template.format(**args)
ans = llm.invoke(prompt)
return ans.content
question = "what are 2024 Q1 revenues?"
generate_answer(llm, vectorstore, prompt_template, question)