import os
from langchain_ibm import WatsonxLLM
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pandas as pd
import getpass
# Set up credentials
credentials = {
"url": "https://us-south.ml.cloud.ibm.com", # Replace with the correct region if needed
"apikey": getpass.getpass("Please enter your WML API key (hit enter): ")
}
# Set up project_id
try:
project_id = os.environ["PROJECT_ID"]
except KeyError:
project_id = input("Please enter your project_id (hit enter): ")
def extract_text_from_txt(file_path):
"""Extracts text from a plain text file."""
with open(file_path, "r", encoding="utf-8") as file:
text = file.read()
return text
def create_vector_index(chunks):
"""Creates a FAISS vector index from text chunks."""
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_texts(chunks, embeddings)
return vector_store
def query_index_with_granite_dynamic(vector_store, query, llm):
"""Searches the vector index, uses Granite to refine the response, and returns all components."""
# Perform similarity search
print("\n> Entering new AgentExecutor chain...")
thought = f"The query '{query}' requires context from the book to provide an accurate response."
print(f" Thought: {thought}")
action = "Search FAISS Vector Store"
print(f" Action: {action}")
action_input = query
print(f" Action Input: \"{action_input}\"")
# Retrieve context
results = vector_store.similarity_search(query, k=3)
observation = "\n".join([result.page_content for result in results])
print(f" Observation:\n{observation}\n")
# Generate response with Granite
prompt = f"Context:\n{observation}\n\nQuestion: {query}\nAnswer:"
print(f" Thought: Combining retrieved context with the query to generate a detailed answer.")
final_answer = llm(prompt)
print(f" Final Answer: {final_answer.strip()}")
print("\n> Finished chain.")
# Return all components as a dictionary
return {
"Thought": thought,
"Action": action,
"Action Input": action_input,
"Observation": observation,
"Final Answer": final_answer.strip()
}
def dynamic_output_to_dataframe(vector_store, queries, llm, csv_filename="output.csv"):
"""Generates a DataFrame dynamically for multiple queries and saves it as a CSV file."""
# List to store all query outputs
output_data = []
# Process each query
for query in queries:
# Capture the output dynamically
output = query_index_with_granite_dynamic(vector_store, query, llm)
output_data.append(output)
# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(output_data)
# Display the DataFrame
print("\nFinal DataFrame:")
print(df)
# Save the DataFrame as a CSV file
df.to_csv(csv_filename, index=False)
print(f"\nOutput saved to {csv_filename}")
def main_workflow():
# Replace with your text file
file_path = "aosh.txt"
# Extract text from the text file
text = extract_text_from_txt(file_path)
# Split the text into chunks
chunks = split_text_into_chunks(text)
# Create a vector index
vector_store = create_vector_index(chunks)
# Define queries
queries = [
"What is the plot of 'A Scandal in Bohemia'?",
"Who is Dr. Watson, and what role does he play in the stories?",
"Describe the relationship between Sherlock Holmes and Irene Adler.",
"What methods does Sherlock Holmes use to solve cases?"
]
# Generate and save output dynamically
dynamic_output_to_dataframe(vector_store, queries, llm)
> Entering new AgentExecutor chain...
Thought: The query 'What is the plot of 'A Scandal in Bohemia'?' requires context from the book to provide an accurate response.
Action: Search FAISS Vector Store
Action Input: "What is the plot of 'A Scandal in Bohemia'?"
Observation:
I. A SCANDAL IN BOHEMIA
I.
“I was aware of it,” said Holmes dryly.
“The circumstances are of great delicacy, and every precaution has to
be taken to quench what might grow to be an immense scandal and
seriously compromise one of the reigning families of Europe. To speak
plainly, the matter implicates the great House of Ormstein, hereditary
kings of Bohemia.”
“I was also aware of that,” murmured Holmes, settling himself down in
his armchair and closing his eyes.
Contents
I. A Scandal in Bohemia
II. The Red-Headed League
III. A Case of Identity
IV. The Boscombe Valley Mystery
V. The Five Orange Pips
VI. The Man with the Twisted Lip
VII. The Adventure of the Blue Carbuncle
VIII. The Adventure of the Speckled Band
IX. The Adventure of the Engineer’s Thumb
X. The Adventure of the Noble Bachelor
XI. The Adventure of the Beryl Coronet
XII. The Adventure of the Copper Beeches
Thought: Combining retrieved context with the query to generate a detailed answer.
/var/folders/4w/smh16qdx6l98q0534hr9v52r0000gn/T/ipykernel_2648/234523588.py:23: LangChainDeprecationWarning: The method `BaseLLM.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 1.0. Use :meth:`~invoke` instead.
final_answer = llm(prompt)
Final Answer: Step 1: Identify the main characters and their roles.
- Sherlock Holmes: The detective who is approached by a client with a delicate matter.
- An unnamed client: A representative of the great House of Ormstein, hereditary kings of Bohemia, who seeks Holmes' help to prevent a potential scandal.
Step 2: Understand the main issue or conflict.
- The main issue is a delicate matter that, if exposed, could lead to a massive scandal and compromise one of the reigning families of Europe, specifically the House of Ormstein.
Step 3: Ident
> Finished chain.
> Entering new AgentExecutor chain...
Thought: The query 'Who is Dr. Watson, and what role does he play in the stories?' requires context from the book to provide an accurate response.
Action: Search FAISS Vector Store
Action Input: "Who is Dr. Watson, and what role does he play in the stories?"
Observation:
“Sarasate plays at the St. James’s Hall this afternoon,” he remarked.
“What do you think, Watson? Could your patients spare you for a few
hours?”
“I have nothing to do to-day. My practice is never very absorbing.”
“Try the settee,” said Holmes, relapsing into his armchair and putting
his fingertips together, as was his custom when in judicial moods. “I
know, my dear Watson, that you share my love of all that is bizarre and
outside the conventions and humdrum routine of everyday life. You have
shown your relish for it by the enthusiasm which has prompted you to
chronicle, and, if you will excuse my saying so, somewhat to embellish
so many of my own little adventures.”
“My God! It’s Watson,” said he. He was in a pitiable state of reaction,
with every nerve in a twitter. “I say, Watson, what o’clock is it?”
“Nearly eleven.”
“Of what day?”
“Of Friday, June 19th.”
“Good heavens! I thought it was Wednesday. It is Wednesday. What d’you
want to frighten a chap for?” He sank his face onto his arms and began
to sob in a high treble key.
“I tell you that it is Friday, man. Your wife has been waiting this two
days for you. You should be ashamed of yourself!”
Thought: Combining retrieved context with the query to generate a detailed answer.
Final Answer: Dr. Watson is a character in the Sherlock Holmes stories, written by Sir Arthur Conan Doyle. He is a former military surgeon who becomes the narrator and chronicler of Holmes' adventures. Watson is a close friend and confidant of Holmes, often accompanying him on cases and providing a more human perspective to the stories. He is known for his enthusiasm for the bizarre and unconventional, as well as his skill in recording the details of their investigations. Watson's role is crucial in presenting the narrative and offering insights into Holmes' character and methods.
> Finished chain.
Final DataFrame:
Thought \
0 The query 'What is the plot of 'A Scandal in B...
1 The query 'Who is Dr. Watson, and what role do...
2 The query 'Describe the relationship between S...
3 The query 'What methods does Sherlock Holmes u...
Action \
0 Search FAISS Vector Store
1 Search FAISS Vector Store
2 Search FAISS Vector Store
3 Search FAISS Vector Store
Action Input \
0 What is the plot of 'A Scandal in Bohemia'?
1 Who is Dr. Watson, and what role does he play ...
2 Describe the relationship between Sherlock Hol...
3 What methods does Sherlock Holmes use to solve...
Observation \
0 I. A SCANDAL IN BOHEMIA\n\n\nI.\n“I was aware ...
1 “Sarasate plays at the St. James’s Hall this a...
2 “You have really got it!” he cried, grasping S...
3 to learn of the case was told me by Sherlock H...
Final Answer
0 Step 1: Identify the main characters and their...
1 Dr. Watson is a character in the Sherlock Holm...
2 Sherlock Holmes and Irene Adler have a profess...
3 Sherlock Holmes uses a variety of methods to s...
Output saved to output.csv
# Load the output.csv file into a DataFrame
df = pd.read_csv("output.csv")
print(df.head()) # Display the first few rows
出力
Thought \
0 The query 'What is the plot of 'A Scandal in B...
1 The query 'Who is Dr. Watson, and what role do...
2 The query 'Describe the relationship between S...
3 The query 'What methods does Sherlock Holmes u...
Action \
0 Search FAISS Vector Store
1 Search FAISS Vector Store
2 Search FAISS Vector Store
3 Search FAISS Vector Store
Action Input \
0 What is the plot of 'A Scandal in Bohemia'?
1 Who is Dr. Watson, and what role does he play ...
2 Describe the relationship between Sherlock Hol...
3 What methods does Sherlock Holmes use to solve...
Observation \
0 I. A SCANDAL IN BOHEMIA\n\n\nI.\n“I was aware ...
1 “Sarasate plays at the St. James’s Hall this a...
2 “You have really got it!” he cried, grasping S...
3 to learn of the case was told me by Sherlock H...
Final Answer
0 Step 1: Identify the main characters and their...
1 Dr. Watson is a character in the Sherlock Holm...
2 Sherlock Holmes and Irene Adler have a profess...
3 Sherlock Holmes uses a variety of methods to s...
def visualize_text_proportion(df):
"""Visualizes the proportion of text used in observations."""
total_text_length = sum(df["Observation"].apply(len)) + sum(df["Final Answer"].apply(len))
observation_text_length = sum(df["Observation"].apply(len))
sizes = [observation_text_length, total_text_length - observation_text_length]
labels = ["Observation Text", "Remaining Text"]
colors = ["#66b3ff", "#99ff99"]
plt.figure(figsize=(4, 4))
plt.pie(sizes, labels=labels, colors=colors, autopct="%1.1f%%", startangle=140)
plt.title("Proportion of Text Used in Observations", fontsize=16)
plt.show()# Call the visualization function
visualize_text_proportion(df)
このコードは、テキスト内で最も頻繁に出現する単語を視覚的に表すために、2つのワードクラウドを Observation
および Final Answer
生成します。
def generate_wordclouds_side_by_side(df):
"""Generates and displays word clouds for Observations and Final Answers side by side."""
# Combine text for Observations and Final Answers
observation_text = " ".join(df["Observation"])
final_answer_text = " ".join(df["Final Answer"])
# Create word clouds
observation_wordcloud = WordCloud(width=800, height=400, background_color="white").generate(observation_text)
final_answer_wordcloud = WordCloud(width=800, height=400, background_color="white").generate(final_answer_text)
# Create a side-by-side visualization
plt.figure(figsize=(16, 8))
# Plot the Observation word cloud
plt.subplot(1, 2, 1)
plt.imshow(observation_wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title("Word Cloud of Observations", fontsize=16)
# Plot the Final Answer word cloud
plt.subplot(1, 2, 2)
plt.imshow(final_answer_wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title("Word Cloud of Final Answers", fontsize=16)
plt.tight_layout()
plt.show()# Call the function to generate and display the word clouds
generate_wordclouds_side_by_side(df)
from sklearn.feature_extraction.text import CountVectorizer
from nltk.translate.bleu_score import sentence_bleu
from sklearn.metrics import precision_score, recall_score