Spaces:
Sleeping
Sleeping
| # Disclaimer Please note that the information provided by this AI-based financial advisory system is for educational and informational purposes only. It does not constitute financial, legal, or tax advice, and should not be relied upon as such. | |
| import os | |
| import re | |
| import chromadb | |
| from chromadb.utils import embedding_functions | |
| from huggingface_hub import InferenceClient | |
| import gradio as gr | |
| db_path = "/home/af/Desktop/gradio/Chroma/db" | |
| if not os.path.exists(db_path): | |
| db_path = "/home/user/app/db" | |
| print(db_path) | |
| client = chromadb.PersistentClient(path=db_path) | |
| default_ef = embedding_functions.DefaultEmbeddingFunction() | |
| sentence_transformer_ef = embedding_functions.DefaultEmbeddingFunction() | |
| print(str(client.list_collections())) | |
| if "name=FinancialAdvisorDB" in str(client.list_collections()): | |
| print("FinancialAdvisorDB found!") | |
| collection = client.get_collection(name="FinancialAdvisorDB", embedding_function=sentence_transformer_ef) | |
| else: | |
| print("FinancialAdvisorDB created!") | |
| collection = client.create_collection( | |
| "FinancialAdvisorDB", | |
| embedding_function=sentence_transformer_ef, | |
| metadata={"hnsw:space": "cosine"}) | |
| collection.add( | |
| documents=[ | |
| "Diversification is a key principle in investing. It involves spreading investments across various asset classes, sectors, and geographies to minimize risk.", | |
| "Compound interest is the interest calculated on the initial principal and the accumulated interest from previous periods. It can significantly grow investments over time.", | |
| "Dollar-cost averaging is an investment strategy where fixed amounts are invested at regular intervals, regardless of market conditions. This helps reduce the impact of volatility.", | |
| "A 401(k) is an employer-sponsored retirement savings plan. Employees can make pre-tax contributions, and employers may offer matching contributions.", | |
| "An IRA (Individual Retirement Account) is a tax-advantaged investment account individuals can use to save for retirement. There are two main types: Traditional and Roth IRAs.", | |
| "Mutual funds are investment vehicles that pool money from many investors to purchase securities like stocks and bonds. They offer diversification and professional management.", | |
| "ETFs (Exchange Traded Funds) are similar to mutual funds but trade on stock exchanges like individual stocks. They often have lower fees than mutual funds.", | |
| "Asset allocation refers to dividing an investment portfolio among different asset categories, like stocks, bonds, and cash. It should align with an investor's goals and risk tolerance.", | |
| "Rebalancing is the process of realigning the weightings of a portfolio to maintain the original asset allocation. It involves periodically selling and buying assets.", | |
| "Stocks represent ownership in a company. They can provide capital appreciation and dividend income, but also carry higher risk compared to other investments.", | |
| "Bonds are debt securities where investors lend money to an entity, which pays interest and returns the principal at maturity. They generally offer lower returns but less risk than stocks." | |
| ], | |
| metadatas=[{"source": "Investopedia"}] * 11, | |
| ids=["div1", "ci1", "dca1", "401k1", "ira1", "mf1", "etf1", "aa1", "rb1", "stock1", "bond1"], | |
| ) | |
| print("Database ready!") | |
| print(collection.count()) | |
| # Model | |
| #------- | |
| model_path = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
| client = InferenceClient(model_path) | |
| # Gradio-GUI | |
| #------------ | |
| def extend_prompt(message="", history=None, system=None, rag_addon=None, system2=None, charlimit=None, historylimit=4, remove_html=False): | |
| start_of_string = "" | |
| if charlimit is None: | |
| charlimit = 1000000000 | |
| template0 = "GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>" | |
| template1 = "GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: " | |
| template2 = "{response}<|end_of_turn|>" | |
| prompt = "" | |
| if rag_addon is not None: | |
| system += rag_addon | |
| if system is not None: | |
| prompt += template0.format(system=system) | |
| if history is not None: | |
| for user_message, bot_response in history[-historylimit:]: | |
| if user_message is None: | |
| user_message = "" | |
| if bot_response is None: | |
| bot_response = "" | |
| bot_response = re.sub("\n\n<details>((.|\n)*?)</details>", "", bot_response) | |
| if remove_html: | |
| bot_response = re.sub("<(.*?)>", "\n", bot_response) | |
| if user_message is not None: | |
| prompt += template1.format(message=user_message[:charlimit]) | |
| if bot_response is not None: | |
| prompt += template2.format(response=bot_response[:charlimit]) | |
| if message is not None: | |
| prompt += template1.format(message=message[:charlimit]) | |
| if system2 is not None: | |
| prompt += system2 | |
| return start_of_string + prompt | |
| def response(prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0): | |
| temperature = float(temperature) | |
| if temperature < 1e-2: | |
| temperature = 1e-2 | |
| top_p = float(top_p) | |
| generate_kwargs = dict( | |
| temperature=temperature, | |
| max_new_tokens=max_new_tokens, | |
| top_p=top_p, | |
| repetition_penalty=repetition_penalty, | |
| do_sample=True, | |
| seed=42, | |
| ) | |
| addon = "" | |
| results = collection.query( | |
| query_texts=[prompt], | |
| n_results=3, | |
| ) | |
| dists = ["<br><small>(relevance: " + str(round((1 - d) * 100) / 100) + ";" for d in results['distances'][0]] | |
| sources = ["source: " + s["source"] + ")</small>" for s in results['metadatas'][0]] | |
| results = results['documents'][0] | |
| combination = zip(results, dists, sources) | |
| combination = [' '.join(triplets) for triplets in combination] | |
| print(combination) | |
| if len(results) > 1: | |
| addon = " Please consider the following excerpts from our database in your response, if they are relevant to answering the question. Provide a concise and accurate answer. Ignore irrelevant database excerpts WITHOUT commenting on, mentioning, or listing them:\n" + "\n".join(results) | |
| system = "You are an AI-based financial advisory system that provides personalized advice and education on various financial topics." + addon + "\n\nUser Query:" | |
| formatted_prompt = extend_prompt(system + "\n" + prompt, None) | |
| stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) | |
| output = "" | |
| for response in stream: | |
| output += response.token.text | |
| yield output | |
| output = output + "\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>" + "".join(["<li>" + s + "</li>" for s in combination]) + "</ul></details>" | |
| yield output | |
| gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None, "Welcome! I am an AI-based financial advisory system. My role is to provide personalized advice and education on various financial topics.<br>Please feel free to ask me any questions you have about investing, retirement planning, or other money matters. I'll do my best to provide helpful guidance tailored to your needs."]], render_markdown=True), title="AI Financial Advisor with RAG").queue().launch(share=False) | |
| print("Interface up and running!") |