Spaces:

JoPmt
/

Quantized_Web_RWKV_agents_RAG_tools_1

Running

App Files Files Community

JoPmt commited on Aug 26

Commit

a600684

•

1 Parent(s): 44540ae

Upload 12 files

Browse files

Files changed (12) hide show

app (10).py +109 -0
browser (2).py +317 -0
chat_with_bot (2).py +278 -0
cookies (2).py +713 -0
llm_engine (2).py +179 -0
mdconvert (2).py +659 -0
requirements (93).txt +32 -0
rwkv_cpp_model (2).py +388 -0
rwkv_cpp_shared_library (2).py +450 -0
rwkv_world_tokenizer (2).py +126 -0
sampling (2).py +52 -0
tokenizer_util (2).py +38 -0

app (10).py ADDED Viewed

	@@ -0,0 +1,109 @@

+from huggingface_hub import login, InferenceClient
+import os, gc, time, random, datetime, json, re
+HF_TOKEN=os.getenv('HF_TOKEN')
+SERP_API_KEY=os.getenv('SERP_KEY')
+login(token=HF_TOKEN)
+import gradio as gr
+from transformers import CodeAgent, Tool, ToolCollection, load_tool, ReactCodeAgent, ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+from langchain.memory import ConversationBufferMemory
+import bs4
+import requests
+from llm_engine import HfEngine
+import datasets
+import spaces
+import tqdm
+from langchain_huggingface.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.vectorstores import VectorStore
+from transformers.agents.prompts import DEFAULT_REACT_CODE_SYSTEM_PROMPT, DEFAULT_REACT_JSON_SYSTEM_PROMPT
+from transformers.agents.default_tools import Tool, PythonInterpreterTool
+from duckduckgo_search import DDGS
+from web_surfer import (SearchInformationTool, NavigationalSearchTool, VisitTool, DownloadTool, PageUpTool, PageDownTool, FinderTool, FindNextTool, ArchiveSearchTool,)
+from mdconvert import MarkdownConverter
+from visual_qa import VisualQATool, VisualQAGPT4Tool
+def search_ducky(query):
+    with DDGS() as ddgs:
+        results = list(ddgs.text(query, max_results=10))
+        content = ''
+        if results:
+            for result in results:
+                content += result['body']
+                return content
+knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
+source_docs = [Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) for doc in knowledge_base]
+docs_processed = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(source_docs)[:1000]
+embedding_model = HuggingFaceEmbeddings(model_name="thenlper/gte-small")
+vectordb = FAISS.from_documents(documents=docs_processed, embedding=embedding_model)
+all_sources = list(set([doc.metadata["source"] for doc in docs_processed]))
+print(all_sources)
+class RetrieverTool(Tool):
+    name = "retriever"
+    description = "Retrieves some documents from the knowledge base that have the closest embeddings to the input query."
+    inputs = {
+        "query": {
+            "type": "text",
+            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
+        },
+        "source": {
+            "type": "text",
+            "description": ""
+        },
+    }
+    output_type = "text"
+    def __init__(self, vectordb: VectorStore, all_sources: str, **kwargs):
+        super().__init__(**kwargs)
+        self.vectordb = vectordb
+        self.inputs["source"]["description"] = (f"The source of the documents to search, as a str representation of a list. Possible values in the list are: {all_sources}. If this argument is not provided, all sources will be searched.")
+    def forward(self, query: str, source: str = None) -> str:
+        assert isinstance(query, str), "Your search query must be a string"
+        if source:
+            if isinstance(source, str) and "[" not in str(source): # if the source is not representing a list
+                source = [source]
+            source = json.loads(str(source).replace("'", '"'))
+        docs = self.vectordb.similarity_search(query, filter=({"source": source} if source else None), k=3)
+        if len(docs) == 0:
+            return "No documents found with this filtering. Try removing the source filter."
+        return "Retrieved documents:\n\n" + "\n===Document===\n".join([doc.page_content for doc in docs])
+memory = ConversationBufferMemory(memory_key="chat_history")
+llm_engine = HfEngine(model="Jopmt/JoPmt")
+##gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+##prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
+##tools = [StableDiffusionTool().langchain, ImageCaptioningTool().langchain, StableDiffusionPromptGeneratorTool().langchain, TextToVideoTool().langchain]
+##tools=[prompt_generator_tool(), image_generation_tool(), PythonInterpreterTool()]
+class SearchTool(Tool):
+    name = "ask_search_agent"
+    description = "A search agent that will browse the internet to answer a question. Use it to gather informations, not for problem-solving."
+    inputs = {
+        "question": {
+            "description": "Your question, as a natural language sentence. You are talking to an agent, so provide them with as much context as possible.",
+            "type": "text",
+        }
+    }
+    output_type = "text"
+    def forward(self, question: str) -> str:
+        return websurfer_agent.run(question)
+tools=[PythonInterpreterTool(),SearchTool(),RetrieverTool(vectordb, all_sources)]
+additional_authorized_imports=['requests', 'bs4', 'os', 'time', 'datetime', 'json', 're']
+WEB_TOOLS = [SearchInformationTool(), NavigationalSearchTool(), VisitTool(), DownloadTool(), PageUpTool(), PageDownTool(), FinderTool(), FindNextTool(), ArchiveSearchTool(),]
+websurfer_agent = ReactJsonAgent(tools=WEB_TOOLS,llm_engine=llm_engine, add_base_tools=True,max_iterations=1)
+reagent = ReactCodeAgent(tools=tools, llm_engine=llm_engine, add_base_tools=True,max_iterations=1,additional_authorized_imports=additional_authorized_imports)
+def plix(inut, progress=gr.Progress(track_tqdm=True)):
+    goose=reagent.run(inut)
+    return goose
+with gr.Blocks(theme=random.choice([gr.themes.Monochrome(),gr.themes.Base.from_hub("gradio/seafoam"),gr.themes.Base.from_hub("freddyaboulton/dracula_revamped"),gr.themes.Glass(),gr.themes.Base(),]),analytics_enabled=False) as iface:
+    out=gr.Textbox(label="🤗Output",lines=5,interactive=False)
+    inut=gr.Textbox(label="Prompt")
+    btn=gr.Button("GENERATE")
+    btn.click(fn=plix,inputs=inut,outputs=out)
+iface.queue(max_size=1,api_open=False)
+iface.launch(max_threads=20,inline=False,show_api=False)

browser (2).py ADDED Viewed

	@@ -0,0 +1,317 @@

+# Shamelessly stolen from Microsoft Autogen team: thanks to them for this great resource!
+# https://github.com/microsoft/autogen/blob/gaia_multiagent_v01_march_1st/autogen/browser_utils.py
+import json
+import os
+import requests
+import re
+import io
+import uuid
+import mimetypes
+import time
+import pathlib
+import pathvalidate
+from urllib.parse import urljoin, urlparse, unquote, parse_qs
+from urllib.request import url2pathname
+from typing import Any, Dict, List, Optional, Union, Tuple
+from mdconvert import MarkdownConverter, UnsupportedFormatException, FileConversionException
+from serpapi import GoogleSearch
+from cookies import COOKIES
+from duckduckgo_search import DDGS
+class SimpleTextBrowser:
+    """(In preview) An extremely simple text-based web browser comparable to Lynx. Suitable for Agentic use."""
+    def __init__(
+        self,
+        start_page: Optional[str] = None,
+        viewport_size: Optional[int] = 1024 * 8,
+        downloads_folder: Optional[Union[str, None]] = None,
+        ##serpapi_key: Optional[Union[str, None]] = None,
+        request_kwargs: Optional[Union[Dict[str, Any], None]] = None,
+    ):
+        self.start_page: str = start_page if start_page else "about:blank"
+        self.viewport_size = viewport_size  # Applies only to the standard uri types
+        self.downloads_folder = downloads_folder
+        self.history: List[Tuple[str, float]] = list()
+        self.page_title: Optional[str] = None
+        self.viewport_current_page = 0
+        self.viewport_pages: List[Tuple[int, int]] = list()
+        self.set_address(self.start_page)
+        ##self.serpapi_key = serpapi_key
+        self.request_kwargs = request_kwargs
+        self.request_kwargs["cookies"] = COOKIES
+        self._mdconvert = MarkdownConverter()
+        self._page_content: str = ""
+        self._find_on_page_query: Union[str, None] = None
+        self._find_on_page_last_result: Union[int, None] = None  # Location of the last result
+    @property
+    def address(self) -> str:
+        """Return the address of the current page."""
+        return self.history[-1][0]
+    def set_address(self, uri_or_path: str) -> None:
+        # TODO: Handle anchors
+        self.history.append((uri_or_path, time.time()))
+        # Handle special URIs
+        if uri_or_path == "about:blank":
+            self._set_page_content("")
+        elif uri_or_path.startswith("google:"):
+            self._serpapi_search(uri_or_path[len("google:"):].strip())
+        else:
+            if (
+                not uri_or_path.startswith("http:")
+                and not uri_or_path.startswith("https:")
+                and not uri_or_path.startswith("file:")
+            ):
+                if len(self.history) > 1:
+                    prior_address = self.history[-2][0]
+                    uri_or_path = urljoin(prior_address, uri_or_path)
+                    # Update the address with the fully-qualified path
+                    self.history[-1] = (uri_or_path, self.history[-1][1])
+            self._fetch_page(uri_or_path)
+        self.viewport_current_page = 0
+        self.find_on_page_query = None
+        self.find_on_page_viewport = None
+    @property
+    def viewport(self) -> str:
+        """Return the content of the current viewport."""
+        bounds = self.viewport_pages[self.viewport_current_page]
+        return self.page_content[bounds[0] : bounds[1]]
+    @property
+    def page_content(self) -> str:
+        """Return the full contents of the current page."""
+        return self._page_content
+    def _set_page_content(self, content: str) -> None:
+        """Sets the text content of the current page."""
+        self._page_content = content
+        self._split_pages()
+        if self.viewport_current_page >= len(self.viewport_pages):
+            self.viewport_current_page = len(self.viewport_pages) - 1
+    def page_down(self) -> None:
+        self.viewport_current_page = min(self.viewport_current_page + 1, len(self.viewport_pages) - 1)
+    def page_up(self) -> None:
+        self.viewport_current_page = max(self.viewport_current_page - 1, 0)
+    def find_on_page(self, query: str) -> Union[str, None]:
+        """Searches for the query from the current viewport forward, looping back to the start if necessary."""
+        # Did we get here via a previous find_on_page search with the same query?
+        # If so, map to find_next
+        if query == self._find_on_page_query and self.viewport_current_page == self._find_on_page_last_result:
+            return self.find_next()
+        # Ok it's a new search start from the current viewport
+        self._find_on_page_query = query
+        viewport_match = self._find_next_viewport(query, self.viewport_current_page)
+        if viewport_match is None:
+            self._find_on_page_last_result = None
+            return None
+        else:
+            self.viewport_current_page = viewport_match
+            self._find_on_page_last_result = viewport_match
+            return self.viewport
+    def find_next(self) -> None:
+        """Scroll to the next viewport that matches the query"""
+        if self._find_on_page_query is None:
+            return None
+        starting_viewport = self._find_on_page_last_result
+        if starting_viewport is None:
+            starting_viewport = 0
+        else:
+            starting_viewport += 1
+            if starting_viewport >= len(self.viewport_pages):
+                starting_viewport = 0
+        viewport_match = self._find_next_viewport(self._find_on_page_query, starting_viewport)
+        if viewport_match is None:
+            self._find_on_page_last_result = None
+            return None
+        else:
+            self.viewport_current_page = viewport_match
+            self._find_on_page_last_result = viewport_match
+            return self.viewport
+    def _find_next_viewport(self, query: str, starting_viewport: int) -> Union[int, None]:
+        """Search for matches between the starting viewport looping when reaching the end."""
+        if query is None:
+            return None
+        # Normalize the query, and convert to a regular expression
+        nquery = re.sub(r"\*", "__STAR__", query)
+        nquery = " " + (" ".join(re.split(r"\W+", nquery))).strip() + " "
+        nquery = nquery.replace(" __STAR__ ", "__STAR__ ")  # Merge isolated stars with prior word
+        nquery = nquery.replace("__STAR__", ".*").lower()
+        if nquery.strip() == "":
+            return None
+        idxs = list()
+        idxs.extend(range(starting_viewport, len(self.viewport_pages)))
+        idxs.extend(range(0, starting_viewport))
+        for i in idxs:
+            bounds = self.viewport_pages[i]
+            content = self.page_content[bounds[0] : bounds[1]]
+            # TODO: Remove markdown links and images
+            ncontent = " " + (" ".join(re.split(r"\W+", content))).strip().lower() + " "
+            if re.search(nquery, ncontent):
+                return i
+        return None
+    def visit_page(self, path_or_uri: str) -> str:
+        self.set_address(path_or_uri)
+        return self.viewport
+    def _split_pages(self) -> None:
+        # Do not split search results
+        if self.address.startswith("google:"):
+            self.viewport_pages = [(0, len(self._page_content))]
+            return
+        # Handle empty pages
+        if len(self._page_content) == 0:
+            self.viewport_pages = [(0, 0)]
+            return
+        # Break the viewport into pages
+        self.viewport_pages = []
+        start_idx = 0
+        while start_idx < len(self._page_content):
+            end_idx = min(start_idx + self.viewport_size, len(self._page_content))  # type: ignore[operator]
+            # Adjust to end on a space
+            while end_idx < len(self._page_content) and self._page_content[end_idx - 1] not in [" ", "\t", "\r", "\n"]:
+                end_idx += 1
+            self.viewport_pages.append((start_idx, end_idx))
+            start_idx = end_idx
+    def _serpapi_search(self, query: str, filter_year: Optional[int] = None) -> None:
+        with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=10))
+        self.page_title = f"{query} - Search"
+        if not results:
+            year_filter_message = f" with filter year={filter_year}" if filter_year is not None else ""
+            self._set_page_content(f"No results found for '{query}'{year_filter_message}. Try with a more general query, or remove the year filter.")
+            return
+        web_snippets: List[str] = list()
+        for idx, page in enumerate(results, 1):
+            snippet = f"\n{page['body']}" if 'body' in page else ""
+            redacted_version = f"{idx}. [{page['title']}]({page['href']})\n{self._prev_visit(page['href'])}{snippet}"
+            web_snippets.append(redacted_version)
+        content = (f"A search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" +
+                   "\n\n".join(web_snippets))
+        self._set_page_content(content)
+    def _fetch_page(self, url: str) -> None:
+        download_path = ""
+        try:
+            if url.startswith("file://"):
+                download_path = os.path.normcase(os.path.normpath(unquote(url[7:])))
+                res = self._mdconvert.convert_local(download_path)
+                self.page_title = res.title
+                self._set_page_content(res.text_content)
+            else:
+                # Prepare the request parameters
+                request_kwargs = self.request_kwargs.copy() if self.request_kwargs is not None else {}
+                request_kwargs["stream"] = True
+                # Send a HTTP request to the URL
+                response = requests.get(url, **request_kwargs)
+                response.raise_for_status()
+                # If the HTTP request was successful
+                content_type = response.headers.get("content-type", "")
+                # Text or HTML
+                if "text/" in content_type.lower():
+                    res = self._mdconvert.convert_response(response)
+                    self.page_title = res.title
+                    self._set_page_content(res.text_content)
+                # A download
+                else:
+                    # Try producing a safe filename
+                    fname = None
+                    download_path = None
+                    try:
+                        fname = pathvalidate.sanitize_filename(os.path.basename(urlparse(url).path)).strip()
+                        download_path = os.path.abspath(os.path.join(self.downloads_folder, fname))
+                        suffix = 0
+                        while os.path.exists(download_path) and suffix < 1000:
+                            suffix += 1
+                            base, ext = os.path.splitext(fname)
+                            new_fname = f"{base}__{suffix}{ext}"
+                            download_path = os.path.abspath(os.path.join(self.downloads_folder, new_fname))
+                    except NameError:
+                        pass
+                    # No suitable name, so make one
+                    if fname is None:
+                        extension = mimetypes.guess_extension(content_type)
+                        if extension is None:
+                            extension = ".download"
+                        fname = str(uuid.uuid4()) + extension
+                        download_path = os.path.abspath(os.path.join(self.downloads_folder, fname))
+                    # Open a file for writing
+                    with open(download_path, "wb") as fh:
+                        for chunk in response.iter_content(chunk_size=512):
+                            fh.write(chunk)
+                    # Render it
+                    local_uri = pathlib.Path(download_path).as_uri()
+                    self.set_address(local_uri)
+        except UnsupportedFormatException as e:
+            print(e)
+            self.page_title = ("Download complete.",)
+            self._set_page_content(f"# Download complete\n\nSaved file to '{download_path}'")
+        except FileConversionException as e:
+            print(e)
+            self.page_title = ("Download complete.",)
+            self._set_page_content(f"# Download complete\n\nSaved file to '{download_path}'")
+        except FileNotFoundError:
+            self.page_title = "Error 404"
+            self._set_page_content(f"## Error 404\n\nFile not found: {download_path}")
+        except requests.exceptions.RequestException as request_exception:
+            try:
+                self.page_title = f"Error {response.status_code}"
+                # If the error was rendered in HTML we might as well render it
+                content_type = response.headers.get("content-type", "")
+                if content_type is not None and "text/html" in content_type.lower():
+                    res = self._mdconvert.convert(response)
+                    self.page_title = f"Error {response.status_code}"
+                    self._set_page_content(f"## Error {response.status_code}\n\n{res.text_content}")
+                else:
+                    text = ""
+                    for chunk in response.iter_content(chunk_size=512, decode_unicode=True):
+                        text += chunk
+                    self.page_title = f"Error {response.status_code}"
+                    self._set_page_content(f"## Error {response.status_code}\n\n{text}")
+            except NameError:
+                self.page_title = f"Error"
+                self._set_page_content(f"## Error\n\n{str(request_exception)}")

chat_with_bot (2).py ADDED Viewed

	@@ -0,0 +1,278 @@

+# Provides terminal-based chat interface for RWKV model.
+# Usage: python chat_with_bot.py C:\rwkv.cpp-169M.bin
+# Prompts and code adapted from https://github.com/BlinkDL/ChatRWKV/blob/9ca4cdba90efaee25cfec21a0bae72cbd48d8acd/chat.py
+import os
+import argparse
+import pathlib
+import copy
+import json
+import time
+import sampling
+from rwkv_cpp import rwkv_cpp_shared_library, rwkv_cpp_model
+from tokenizer_util import add_tokenizer_argument, get_tokenizer
+from typing import List, Dict, Optional
+# ======================================== Script settings ========================================
+# English, Chinese, Japanese
+LANGUAGE: str = 'English'
+# QA: Question and Answer prompt to talk to an AI assistant.
+# Chat: chat prompt (need a large model for adequate quality, 7B+).
+PROMPT_TYPE: str = 'QA'
+MAX_GENERATION_LENGTH: int = 250
+# Sampling temperature. It could be a good idea to increase temperature when top_p is low.
+TEMPERATURE: float = 0.8
+# For better Q&A accuracy and less diversity, reduce top_p (to 0.5, 0.2, 0.1 etc.)
+TOP_P: float = 0.5
+# Penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+PRESENCE_PENALTY: float = 0.2
+# Penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+FREQUENCY_PENALTY: float = 0.2
+END_OF_LINE_TOKEN: int = 187
+DOUBLE_END_OF_LINE_TOKEN: int = 535
+END_OF_TEXT_TOKEN: int = 0
+# =================================================================================================
+parser = argparse.ArgumentParser(description='Provide terminal-based chat interface for RWKV model')
+parser.add_argument('model_path', help='Path to RWKV model in ggml format')
+add_tokenizer_argument(parser)
+args = parser.parse_args()
+script_dir: pathlib.Path = pathlib.Path(os.path.abspath(__file__)).parent
+with open(script_dir / 'prompt' / f'{LANGUAGE}-{PROMPT_TYPE}.json', 'r', encoding='utf8') as json_file:
+    prompt_data = json.load(json_file)
+    user, bot, separator, init_prompt = prompt_data['user'], prompt_data['bot'], prompt_data['separator'], prompt_data['prompt']
+if init_prompt == '':
+    raise ValueError('Prompt must not be empty')
+library = rwkv_cpp_shared_library.load_rwkv_shared_library()
+print(f'System info: {library.rwkv_get_system_info_string()}')
+print('Loading RWKV model')
+model = rwkv_cpp_model.RWKVModel(library, args.model_path)
+tokenizer_decode, tokenizer_encode = get_tokenizer(args.tokenizer, model.n_vocab)
+# =================================================================================================
+processed_tokens: List[int] = []
+logits: Optional[rwkv_cpp_model.NumpyArrayOrPyTorchTensor] = None
+state: Optional[rwkv_cpp_model.NumpyArrayOrPyTorchTensor] = None
+def process_tokens(_tokens: List[int], new_line_logit_bias: float = 0.0) -> None:
+    global processed_tokens, logits, state
+    logits, state = model.eval_sequence_in_chunks(_tokens, state, state, logits, use_numpy=True)
+    processed_tokens += _tokens
+    logits[END_OF_LINE_TOKEN] += new_line_logit_bias
+state_by_thread: Dict[str, Dict] = {}
+def save_thread_state(_thread: str) -> None:
+    state_by_thread[_thread] = {
+        'tokens': copy.deepcopy(processed_tokens),
+        'logits': copy.deepcopy(logits),
+        'state': copy.deepcopy(state)
+    }
+def load_thread_state(_thread: str) -> None:
+    global processed_tokens, logits, state
+    thread_state = state_by_thread[_thread]
+    processed_tokens = copy.deepcopy(thread_state['tokens'])
+    logits = copy.deepcopy(thread_state['logits'])
+    state = copy.deepcopy(thread_state['state'])
+# Model only saw '\n\n' as [187, 187] before, but the tokenizer outputs [535] for it at the end.
+# See https://github.com/BlinkDL/ChatRWKV/pull/110/files
+def split_last_end_of_line(tokens: List[int]) -> List[int]:
+    if len(tokens) > 0 and tokens[-1] == DOUBLE_END_OF_LINE_TOKEN:
+        tokens = tokens[:-1] + [END_OF_LINE_TOKEN, END_OF_LINE_TOKEN]
+    return tokens
+# =================================================================================================
+processing_start: float = time.time()
+prompt_tokens = tokenizer_encode(init_prompt)
+prompt_token_count = len(prompt_tokens)
+print(f'Processing {prompt_token_count} prompt tokens, may take a while')
+process_tokens(split_last_end_of_line(prompt_tokens))
+processing_duration: float = time.time() - processing_start
+print(f'Processed in {int(processing_duration)} s, {int(processing_duration / prompt_token_count * 1000)} ms per token')
+save_thread_state('chat_init')
+save_thread_state('chat')
+print(f'\nChat initialized! Your name is {user}. Write something and press Enter. Use \\n to add line breaks to your message.')
+while True:
+    # Read user input
+    user_input: str = input(f'> {user}{separator} ')
+    msg: str = user_input.replace('\\n', '\n').strip()
+    temperature: float = TEMPERATURE
+    top_p: float = TOP_P
+    if '-temp=' in msg:
+        temperature = float(msg.split('-temp=')[1].split(' ')[0])
+        msg = msg.replace('-temp='+f'{temperature:g}', '')
+        if temperature <= 0.2:
+            temperature = 0.2
+        if temperature >= 5:
+            temperature = 5
+    if '-top_p=' in msg:
+        top_p = float(msg.split('-top_p=')[1].split(' ')[0])
+        msg = msg.replace('-top_p='+f'{top_p:g}', '')
+        if top_p <= 0:
+            top_p = 0
+    msg = msg.strip()
+    # + reset --> reset chat
+    if msg == '+reset':
+        load_thread_state('chat_init')
+        save_thread_state('chat')
+        print(f'{bot}{separator} Chat reset.\n')
+        continue
+    elif msg[:5].lower() == '+gen ' or msg[:3].lower() == '+i ' or msg[:4].lower() == '+qa ' or msg[:4].lower() == '+qq ' or msg.lower() == '+++' or msg.lower() == '++':
+        # +gen YOUR PROMPT --> free single-round generation with any prompt. Requires Novel model.
+        if msg[:5].lower() == '+gen ':
+            new = '\n' + msg[5:].strip()
+            state = None
+            processed_tokens = []
+            process_tokens(tokenizer_encode(new))
+            save_thread_state('gen_0')
+        # +i YOUR INSTRUCT --> free single-round generation with any instruct. Requires Raven model.
+        elif msg[:3].lower() == '+i ':
+            new = f'''
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+# Instruction:
+{msg[3:].strip()}
+# Response:
+'''
+            state = None
+            processed_tokens = []
+            process_tokens(tokenizer_encode(new))
+            save_thread_state('gen_0')
+        # +qq YOUR QUESTION --> answer an independent question with more creativity (regardless of context).
+        elif msg[:4].lower() == '+qq ':
+            new = '\nQ: ' + msg[4:].strip() + '\nA:'
+            state = None
+            processed_tokens = []
+            process_tokens(tokenizer_encode(new))
+            save_thread_state('gen_0')
+        # +qa YOUR QUESTION --> answer an independent question (regardless of context).
+        elif msg[:4].lower() == '+qa ':
+            load_thread_state('chat_init')
+            real_msg = msg[4:].strip()
+            new = f'{user}{separator} {real_msg}\n\n{bot}{separator}'
+            process_tokens(tokenizer_encode(new))
+            save_thread_state('gen_0')
+        # +++ --> continue last free generation (only for +gen / +i)
+        elif msg.lower() == '+++':
+            try:
+                load_thread_state('gen_1')
+                save_thread_state('gen_0')
+            except Exception as e:
+                print(e)
+                continue
+        # ++ --> retry last free generation (only for +gen / +i)
+        elif msg.lower() == '++':
+            try:
+                load_thread_state('gen_0')
+            except Exception as e:
+                print(e)
+                continue
+        thread = 'gen_1'
+    else:
+        # + --> alternate chat reply
+        if msg.lower() == '+':
+            try:
+                load_thread_state('chat_pre')
+            except Exception as e:
+                print(e)
+                continue
+        # chat with bot
+        else:
+            load_thread_state('chat')
+            new = f'{user}{separator} {msg}\n\n{bot}{separator}'
+            process_tokens(tokenizer_encode(new), new_line_logit_bias=-999999999)
+            save_thread_state('chat_pre')
+        thread = 'chat'
+        # Print bot response
+        print(f'> {bot}{separator}', end='')
+    start_index: int = len(processed_tokens)
+    accumulated_tokens: List[int] = []
+    token_counts: Dict[int, int] = {}
+    for i in range(MAX_GENERATION_LENGTH):
+        for n in token_counts:
+            logits[n] -= PRESENCE_PENALTY + token_counts[n] * FREQUENCY_PENALTY
+        token: int = sampling.sample_logits(logits, temperature, top_p)
+        if token == END_OF_TEXT_TOKEN:
+            print()
+            break
+        if token not in token_counts:
+            token_counts[token] = 1
+        else:
+            token_counts[token] += 1
+        process_tokens([token])
+        # Avoid UTF-8 display issues
+        accumulated_tokens += [token]
+        decoded: str = tokenizer_decode(accumulated_tokens)
+        if '\uFFFD' not in decoded:
+            print(decoded, end='', flush=True)
+            accumulated_tokens = []
+        if thread == 'chat':
+            if '\n\n' in tokenizer_decode(processed_tokens[start_index:]):
+                break
+        if i == MAX_GENERATION_LENGTH - 1:
+            print()
+    save_thread_state(thread)

cookies (2).py ADDED Viewed

	@@ -0,0 +1,713 @@

+COOKIES_LIST = [
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1718884961,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "ST-xuwub9",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "session_logininfo=AFmmF2swRAIgf4gadACOuWOcipI1anW-dakEjtidNLkufnOC8uml7EECIDh2YisqWELDBJPTGUysCucJ3I0wjXxYjVHro1LHrdW0%3AQUQ3MjNmd2Jiajl3OWZYRnpFNnZlWWV5ZGJWZ0hpcmp4LVVPU280bk4zOS03Z0ozZG9fOFhWZ0dXaVo3NG1wTEg1b3hGaG10TFBlaFBnTlJfbER5bEp0aFhoNS1OLVhYNFRZT2F6ajgzOFpDbGhlUjZpMWRETlFFRjFfTTRiM0RnNTROSkdmMTFMVjFic1VuZ2trbGp4aktDa0JJUC1BWDh3"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753004444.745411,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-YEC",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "CgtRVnI5LW1zRHlQVSjbtNCzBjIhCgJGUhIbEhcSFRMLFBUWFwwYGRobHB0eHw4PIBAREiAk"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050824,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-3PSID",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "g.a000kwibeLUu8Ea9Y-vLun7u3kU5VNJVuMAZl_jdfJaNm50JyDBB4ezJ_bdWu46a7YwObVn44wACgYKAakSARQSFQHGX2MicJcTzecTKH6bHzqU6TMbTxoVAUF8yKqQYK-MoI6Ql3vI2oYTB3E-0076"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420959.974642,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "SIDCC",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "AKEyXzWQZauHKOo8t87zoEcjaVNIYUX54ohoWXT-tX4aAhEuZzIIptxZAcNkHuG2oDXYL6t-lw"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050652,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "SID",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "g.a000kwibeLUu8Ea9Y-vLun7u3kU5VNJVuMAZl_jdfJaNm50JyDBB6VHrZcC3gBAsFPbCQ0gF5AACgYKAYkSARQSFQHGX2Mi9kt0gHg5CxCYSkLQGHWaeBoVAUF8yKre_V6r3jZVak6JV4o2Q0FL0076"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420958.397534,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-1PSIDTS",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "sidts-CjIB3EgAEkYL2L-GfrEzW5Dfy62S9oefGNLgst78S_986htCnGcfkxECch_9oz-qytSsZBAA"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753433494.44729,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga_M0180HEFCY",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GS1.1.1718871908.1.0.1718873494.0.0.0"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050933,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "SAPISID",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "mfeuiC-HraNJ-A03/ASXvCPNJSw7yTFgd6"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420959.974764,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-1PSIDCC",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "AKEyXzWHDSoXGCZpZhPxRrnC7B1s8zGIUjeMVyvgtQfsm1fs92lXPtFEI_td9LBUyqVUe0xK"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050881,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "SSID",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "AmlwXHnQvOQ10LVd-"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050959,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__Secure-1PAPISID",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "mfeuiC-HraNJ-A03/ASXvCPNJSw7yTFgd6"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050795,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-1PSID",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "g.a000kwibeLUu8Ea9Y-vLun7u3kU5VNJVuMAZl_jdfJaNm50JyDBBrlk7lRpKQGywAHEon7WGQAACgYKAQsSARQSFQHGX2MirAmnSRdZl6GPG6KLd4hOihoVAUF8yKoV17Tcj1a_OenIOkf2wBjO0076"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050993,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__Secure-3PAPISID",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "mfeuiC-HraNJ-A03/ASXvCPNJSw7yTFgd6"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420959.974815,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-3PSIDCC",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "AKEyXzXM5UjKUEXwSHVmRAIo6hGHA4G63adj3EE1VdNriD0f38jZQbsUKiD4LQbA3BValmTFDg"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1750420958.397647,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__Secure-3PSIDTS",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "sidts-CjIB3EgAEkYL2L-GfrEzW5Dfy62S9oefGNLgst78S_986htCnGcfkxECch_9oz-qytSsZBAA"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050908,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "APISID",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "IlQWLPjdNqziwCrV/ANG7Z4x5FF-IBxbZk"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753434620.050855,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "HSID",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "AasA7hmRuTFv7vjoq"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753435873.577793,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "LOGIN_INFO",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "AFmmF2swRAIgf4gadACOuWOcipI1anW-dakEjtidNLkufnOC8uml7EECIDh2YisqWELDBJPTGUysCucJ3I0wjXxYjVHro1LHrdW0:QUQ3MjNmd2Jiajl3OWZYRnpFNnZlWWV5ZGJWZ0hpcmp4LVVPU280bk4zOS03Z0ozZG9fOFhWZ0dXaVo3NG1wTEg1b3hGaG10TFBlaFBnTlJfbER5bEp0aFhoNS1OLVhYNFRZT2F6ajgzOFpDbGhlUjZpMWRETlFFRjFfTTRiM0RnNTROSkdmMTFMVjFic1VuZ2trbGp4aktDa0JJUC1BWDh3"
+    },
+    {
+        "domain": ".youtube.com",
+        "expirationDate": 1753444956.555608,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "PREF",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "f4=4000000&f6=40000000&tz=Europe.Paris&f5=30000&f7=100"
+    }
+]
+COOKIES_LIST += [
+    {
+        "domain": ".www.researchgate.net",
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "isInstIp",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "False"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1734423981,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__eoi",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "ID=c26f752377373146:T=1718871981:RT=1718884914:S=AA-AfjZw-T_OOX2kW2LLaFzXImgc"
+    },
+    {
+        "domain": ".www.researchgate.net",
+        "expirationDate": 1753444909.646103,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "ptc",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "RG1.8947708639250500550.1718872043"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1750507578,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "euconsent-v2-didomi",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "CQAgmoAQAgmoAAHABBENA5EsAP_gAEPgAAYgJ2pB5G5UTWlBIG53YMskIAUFhFBoQEAgAACAAwIBSBIAIIwEAGAAIAgAICACAAIAIBIAIABAGAAAAAAAYIAAIAAIAAAQIAAKIAAAAAAAAgBQAAgIAgggEAAAgEBEABAAgAAAEIIAQNgACgAAACCAAAAAAAABAAAAAAAAQAAAAAAAYCQAAAJIAAAAACAIABAIAAAAAAAAAAAAAAAABBAAIJ2wPIAFAAXABQAFQALgAcAA8ACAAEgALwAZAA0ACIAEcAJgAUgAqgBcADEAGgAPQAfgBEACOAE4AMMAZYA0QBsgDkAHOAO4AfsBBwEIAItARwBHQC6gHUAO2Ae0A_4CHQEXgJ2AUOAo8BT4CpQFqALYAXmAwQBkgDLAGXANjAhCBG8CbAE3gJ1gTtAA.f_wACHwAAAAA"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1718885236,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_gat",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "1"
+    },
+    {
+        "domain": "www.researchgate.net",
+        "expirationDate": 1721477183,
+        "hostOnly": True,
+        "httpOnly": False,
+        "name": "_pbjs_userid_consent_data",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "3524755945110770"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1752567981,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__gads",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "ID=eca2adb88969c830:T=1718871981:RT=1718884914:S=ALNI_MY2qZchynrhWX6hWMlaI87Pcj9riQ"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1718886709.646173,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "__cf_bm",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "IkQ_J4ciBzKQduRvjqsfSmQu8UygDWbHeROO5JVccfo-1718884909-1.0.1.1-qvNGEdbfI0HfhFP6kwe7R7mkTqODNhFuKhs72lLly6K2BOPMG3kbahpQFGvPK0U8FUfkznkq65gngd1sWj7sDA"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1752567981,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "__gpi",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "UID=00000e4e9aa2e6f2:T=1718871981:RT=1718884914:S=ALNI_MYFNrgzkKn7K6Bd2y8hC6GJCvDiSg"
+    },
+    {
+        "domain": ".researchgate.net",
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "_cfuvid",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "_GPmGZkBymiH3UiqTqzakEpi98br3nfFUWC2_u_wqkc-1718884909785-0.0.1.1-604800000"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1753445177.271667,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.1.1525244793.1718885177"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1753445177.271482,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga_4P31SJ70EJ",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GS1.1.1718885177.1.0.1718885177.0.0.0"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1718971576,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_gid",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.2.854907463.1718885177"
+    },
+    {
+        "domain": ".www.researchgate.net",
+        "expirationDate": 1750407982.506505,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "did",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "1dWLO3C6am8l667Q4VUlBo0O1LI49Qi2Vw21SJEXHavBDYT56DI9007W5rYGVFVH"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1750507578,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "didomi_token",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "eyJ1c2VyX2lkIjoiMTkwMzU4YTUtNWU2My02Y2UzLWJlNzAtZGFjNzVmYjdiY2ExIiwiY3JlYXRlZCI6IjIwMjQtMDYtMjBUMTI6MDY6MTYuODA2WiIsInVwZGF0ZWQiOiIyMDI0LTA2LTIwVDEyOjA2OjE4Ljc4MVoiLCJ2ZW5kb3JzIjp7ImVuYWJsZWQiOlsidHdpdHRlciIsImdvb2dsZSIsImM6bGlua2VkaW4tbWFya2V0aW5nLXNvbHV0aW9ucyIsImM6b3duZXJpcSIsImM6b21uaXR1cmUtYWRvYmUtYW5hbHl0aWNzIiwiYzp0ZWNobm9yYXRpLW1lZGlhIiwiYzppbnRlcmNvbSIsImM6aW50ZW50LWlxIiwiYzppcHJvbSIsImM6bGlua2VkaW4iLCJjOmFtYXpvbmFkdi16Y1hGTEI2WCIsImM6bWVkaWFuZXQtY1V3YUtFNnoiLCJjOmluZGV4ZXhjaC1OWkNRTTY4UCIsImM6emVvdGFwZ21iLWQ3YndtdGp3IiwiYzp0cmlwbGVsaWYtZGRKSDM0clkiLCJjOnJ0YmhvdXNlLWI4Y2RIOHRNIiwiYzptZHByaW1pcy1lYU4yOVdjUCIsImM6bG9vcG1lbGktVGRhWXRCUHEiLCJjOm1hZ25pdGVpbi05d1RZTHFSRCIsImM6Ymlkc3dpdGNoLWQ2N0V3N1c5IiwiYzpvcmFjbGVhZHYtcUhlREptQUwiLCJjOmdvb2dsZWFuYS00VFhuSmlnUiIsImM6bG90YW1lc29sLURIaTdMUmpNIiwiYzpuZXh0bWlsbGUtR0pyZlg4VWMiLCJjOm5yaWNodGVjLXFVVlEyUlFxIiwiYzpicml0ZXBvb2wtQldWeVdHeVUiLCJjOnRhcGFkaW5jLXFxY2tVN1BXIiwiYzppZDV0ZWNobi16Tk1KNGR3ZiIsImM6bWljcm9zb2Z0IiwiYzpwZXJtdXRpdmUtSjdpaHJlTWsiLCJjOm9wZXJhc29mdC1CY1hjRFZKTSIsImM6cG9zdGhvZy1Cakp4RmRGOSJdfSwicHVycG9zZXMiOnsiZW5hYmxlZCI6WyJnZW9sb2NhdGlvbl9kYXRhIiwiZGV2aWNlX2NoYXJhY3RlcmlzdGljcyJdfSwidmVuZG9yc19saSI6eyJlbmFibGVkIjpbImdvb2dsZSIsImM6b3BlcmFzb2Z0LUJjWGNEVkpNIl19LCJ2ZXJzaW9uIjoyLCJhYyI6IkRIU0FvQUZrQWNnQTVnSHFnUUhBeGdCNndEMTRJR0FRTkFqMEJJd0NTY0VyQUtCd1YtZ3MxQmgwREc0R09nQUEuREhTQW9BRmtBY2dBNWdIcWdRSEF4Z0I2d0QxNElHQVFOQWowQkl3Q1NjRXJBS0J3Vi1nczFCaDBERzRHT2dBQSJ9"
+    },
+    {
+        "domain": ".www.researchgate.net",
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "hasPdpNext",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "False"
+    },
+    {
+        "domain": ".researchgate.net",
+        "expirationDate": 1750421183,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "ph_phc_ma1XTQyee96N1GML6qUTgLQRiDifnRcE9STiHTZ0CfZ_posthog",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "%7B%22distinct_id%22%3A%220190358a-56a1-7313-83b0-d13dddeac787%22%2C%22%24sesid%22%3A%5B1718885183223%2C%220190358a-56a1-7313-83b0-d13b2b87778d%22%2C1718885176993%5D%2C%22%24session_is_sampled%22%3Atrue%7D"
+    },
+    {
+        "domain": ".www.researchgate.net",
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "sid",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "qmH5Lc4f0CUJ3zeaxORcV0S8I8V1MuCFZtcIQqPYtv1XPejrbSLAQRbT50PL40TqeKQ1XsQDWt9gtYVzuL80bRmPjw6jn3cQ0ikNqW40maHcQ3JL2Vfa8ZZf0j7p35eJ"
+    }
+]
+COOKIES_LIST += [
+    {
+        "domain": "github.com",
+        "hostOnly": True,
+        "httpOnly": True,
+        "name": "_gh_sess",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "P%2Fmof1avuqwHaUQUIJR%2FZYn7jqbT7lgGuTGjp1BGAFIG5UpNDusEE3b8dRjz0eATE5xPdPjLYFqMs%2FI9AOalKX4YuYfSEEnxCMawU01099b4o9Xzzcv%2BmecrmO0Q8q%2Bdq1h8SIv6nvPP7HzlFesl8ysafb9b%2F0q6dTArKdSOurasza8UgLSYD08ofA50Pcm0IG7CTzF8ZCizrGgGTMi%2F%2B7L3E17jav5PM1Sf2vQKg15Gbg1QIOppJJHzlufgQoZigqFv%2BWznaws0Tt7Y2lSFCw%3D%3D--CJRhqMXJnwOaJgk4--DhUErlL4GdROikEjKD4O9g%3D%3D"
+    },
+    {
+        "domain": ".github.com",
+        "expirationDate": 1750408875.763785,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_octo",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "GH1.1.728652011.1718872875"
+    },
+    {
+        "domain": ".github.com",
+        "expirationDate": 1750408875.763926,
+        "hostOnly": False,
+        "httpOnly": True,
+        "name": "logged_in",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": False,
+        "storeId": None,
+        "value": "no"
+    },
+    {
+        "domain": ".github.com",
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "preferred_color_mode",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "dark"
+    },
+    {
+        "domain": ".github.com",
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "tz",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "Europe%2FParis"
+    }
+]
+COOKIES_LIST += [
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1718886430,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_gat",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "1"
+    },
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1718972770,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_gid",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.2.402246368.1606169825"
+    },
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1753446370.315621,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.2.1301409987.1606169825"
+    },
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1750422367,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_hjid",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "07f80263-a631-4bf4-8ffd-8fc8912085e2"
+    },
+    {
+        "domain": ".web.archive.org",
+        "expirationDate": 1718888167,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_hjFirstSeen",
+        "path": "/web/20201123221659/http://orcid.org/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "1"
+    }
+]
+COOKIES_LIST += [
+    {
+        "domain": "orcid.org",
+        "hostOnly": True,
+        "httpOnly": False,
+        "name": "AWSELBCORS",
+        "path": "/",
+        "sameSite": "no_restriction",
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "CBD1D7FF1216388FA48838CBCA4774FD22800B8FB548A40EF92BB0994D5B77A8410307CDEAA69C52236663F2BF89B252C17BC0FCDF790FD59771BDDF6EA8CA4CFD29D8733F"
+    },
+    {
+        "domain": ".orcid.org",
+        "expirationDate": 1753452454.637671,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga_9R61FWK9H5",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GS1.1.1718892454.1.0.1718892454.0.0.0"
+    },
+    {
+        "domain": ".orcid.org",
+        "expirationDate": 1753452454.63421,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "_ga",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "GA1.1.2021310691.1718892455"
+    },
+    {
+        "domain": "orcid.org",
+        "hostOnly": True,
+        "httpOnly": False,
+        "name": "AWSELB",
+        "path": "/",
+        "sameSite": None,
+        "secure": False,
+        "session": True,
+        "storeId": None,
+        "value": "CBD1D7FF1216388FA48838CBCA4774FD22800B8FB548A40EF92BB0994D5B77A8410307CDEAA69C52236663F2BF89B252C17BC0FCDF790FD59771BDDF6EA8CA4CFD29D8733F"
+    },
+    {
+        "domain": ".orcid.org",
+        "expirationDate": 1750428454,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "OptanonAlertBoxClosed",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "2024-06-20T14:07:34.583Z"
+    },
+    {
+        "domain": ".orcid.org",
+        "expirationDate": 1750428454,
+        "hostOnly": False,
+        "httpOnly": False,
+        "name": "OptanonConsent",
+        "path": "/",
+        "sameSite": "lax",
+        "secure": False,
+        "session": False,
+        "storeId": None,
+        "value": "isGpcEnabled=0&datestamp=Thu+Jun+20+2024+16%3A07%3A34+GMT%2B0200+(heure+d%E2%80%99%C3%A9t%C3%A9+d%E2%80%99Europe+centrale)&version=202310.2.0&browserGpcFlag=0&isIABGlobal=False&hosts=&landingPath=NotLandingPage&groups=C0001%3A1%2CC0003%3A1%2CC0002%3A1%2CC0004%3A1"
+    },
+    {
+        "domain": "orcid.org",
+        "hostOnly": True,
+        "httpOnly": False,
+        "name": "XSRF-TOKEN",
+        "path": "/",
+        "sameSite": None,
+        "secure": True,
+        "session": True,
+        "storeId": None,
+        "value": "6957be7a-bcb4-4d59-a522-ea9b6b210ed9"
+    }
+]
+from requests.cookies import RequestsCookieJar
+# Create a RequestsCookieJar instance
+COOKIES = RequestsCookieJar()
+# Add cookies to the jar
+for cookie in COOKIES_LIST:
+    COOKIES.set(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path'])

llm_engine (2).py ADDED Viewed

	@@ -0,0 +1,179 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os, time, json, re, gc, subprocess
+import gradio as gr
+import torch
+import numpy as np
+import argparse
+import time
+import sampling
+import copy
+from datetime import datetime
+from huggingface_hub import hf_hub_download
+from pynvml import *
+from tokenizer_util import add_tokenizer_argument, get_tokenizer
+import rwkv_world_tokenizer
+from huggingface_hub import snapshot_download, hf_hub_download
+hf_hub_download(repo_id="JoPmt/RWKV-5-3B-V2-Quant", filename="rwkv-5-world-3b-v2-20231118-ctx16k.Q4_0.bin", local_dir='~/app/Downloads')
+model_path='~/app/Downloads/rwkv-5-world-3b-v2-20231118-ctx16k.Q4_0.bin'
+from copy import deepcopy
+from enum import Enum
+from typing import Dict, List
+from huggingface_hub import InferenceClient
+from transformers.agents import PythonInterpreterTool
+from transformers import AutoTokenizer
+tokenizer=AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B",revision="pr/13")
+tools=[PythonInterpreterTool()]
+os.system("apt-get update && apt-get install cmake gcc g++")
+os.system("git clone --recursive https://github.com/JoPmt/rwkv.cpp.git && cd rwkv.cpp && mkdir build && cd build && cmake .. -DRWKV_CUBLAS=ON -DRWKV_BUILD_SHARED_LIBRARY=ON -DGGML_CUDA=ON -DRWKV_BUILD_PYTHON_MODULE=ON -DRWKV_BUILD_TOOLS=ON -DRWKV_BUILD_EXTRAS=ON && cmake --build . --config Release && make RWKV_CUBLAS=1 GGML_CUDA=1")
+import rwkv_cpp_model
+import rwkv_cpp_shared_library
+def find_lib():
+    for root, dirs, files in os.walk("/"):
+        for file in files:
+            if file == "librwkv.so":
+                return os.path.join(root, file)
+    return None
+library_path = find_lib()
+rwkv_lib = rwkv_cpp_shared_library.RWKVSharedLibrary(library_path)
+modal = rwkv_cpp_model.RWKVModel(rwkv_lib,model_path,thread_count=2)
+print('Loading RWKV model')
+tokenizer_decode, tokenizer_encode = get_tokenizer('auto', modal.n_vocab)
+out_str = ''
+prompt = out_str
+token_count = 1200
+temperature = 1.0
+top_p = 0.7
+presence_penalty = 0.1
+count_penalty = 0.4
+def generate_prompt(instruction, zput=""):
+    instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
+    zput = zput.strip().replace('\r\n','\n').replace('\n\n','\n')
+    if zput:
+        return f"""Instruction: {instruction}
+Input: {zput}
+Response:"""
+    else:
+        return f"""User: hi
+Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
+User: {instruction}
+Assistant:"""
+class MessageRole(str, Enum):
+    USER = "user"
+    ASSISTANT = "assistant"
+    SYSTEM = "system"
+    TOOL_CALL = "tool-call"
+    TOOL_RESPONSE = "tool-response"
+    @classmethod
+    def roles(cls):
+        return [r.value for r in cls]
+def get_clean_message_list(message_list: List[Dict[str, str]], role_conversions: Dict[str, str] = {}):
+    """
+    Subsequent messages with the same role will be concatenated to a single message.
+    Args:
+        message_list (`List[Dict[str, str]]`): List of chat messages.
+    """
+    final_message_list = []
+    message_list = deepcopy(message_list)  # Avoid modifying the original list
+    for message in message_list:
+        if not set(message.keys()) == {"role", "content"}:
+            raise ValueError("Message should contain only 'role' and 'content' keys!")
+        role = message["role"]
+        if role not in MessageRole.roles():
+            raise ValueError(f"Incorrect role {role}, only {MessageRole.roles()} are supported for now.")
+        if role in role_conversions:
+            message["role"] = role_conversions[role]
+        if len(final_message_list) > 0 and message["role"] == final_message_list[-1]["role"]:
+            final_message_list[-1]["content"] = "\n=======\n" + message["content"]
+        else:
+            final_message_list.append(message)
+    return final_message_list
+llama_role_conversions = {
+    MessageRole.TOOL_RESPONSE: MessageRole.USER,
+    MessageRole.TOOL_CALL: MessageRole.USER,
+}
+class HfEngine:
+    def __init__(self, model: str = "JoPmt/JoPmt"):
+        self.model = model
+        self.client = modal
+    def __call__(self, messages: List[Dict[str, str]], stop_sequences=[]) -> str:
+        messages = get_clean_message_list(messages, role_conversions=llama_role_conversions)
+        print(messages)
+        pret=''
+        prut=''
+        for message in messages:
+            print(message['content'])
+            if message['role'].lower() == 'system':
+                pret+=''+message['content']+''
+            if message['role'].lower() == 'user':
+                prut+=''+message['content']+''
+        ##prompt = ins.format(question=''+pret+''+prut+'', system=pret)
+        prompt=tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True,)
+        print(prompt)
+        token_count=1200
+        temperature=1.0
+        top_p=0.7
+        presencePenalty = 0.1
+        countPenalty = 0.4
+        token_ban=[]
+        stop_token=[0]
+        ctx=pret
+        prompt=prut
+        all_tokens = []
+        out_last = 0
+        out_str = ''
+        occurrence = {}
+        state = None
+        ctx=generate_prompt(ctx,prompt)
+        prompt_tokens = tokenizer_encode(ctx)
+        prompt_token_count = len(prompt_tokens)
+        init_logits, init_state = modal.eval_sequence_in_chunks(prompt_tokens, None, None, None, use_numpy=True)
+        logits, state = init_logits.copy(), init_state.copy()
+        out_str = ''
+        occurrence = {}
+        bof=[]
+        for i in range(token_count):
+          for n in occurrence:
+            logits[n] -= (presencePenalty + occurrence[n] * countPenalty)
+          token = sampling.sample_logits(logits, temperature, top_p)
+          if token in stop_token:
+            break
+          all_tokens += [token]
+          for xxx in occurrence:
+            occurrence[xxx] *= 0.996
+          if token not in occurrence:
+            occurrence[token] = 1
+          else:
+            occurrence[token] += 1
+          tmp = tokenizer_decode(all_tokens[out_last:])
+          if '\ufffd' not in tmp:
+            out_str += tmp
+            out_last = i + 1
+          ##yield out_str.strip()
+          logits, state = modal.eval(token, state, state, logits, use_numpy=True)
+        del state
+        gc.collect()
+        return out_str.strip()

mdconvert (2).py ADDED Viewed

	@@ -0,0 +1,659 @@

+# ruff: noqa: E722
+import json
+import os
+import requests
+import re
+import markdownify
+import mimetypes
+import html
+import puremagic
+import tempfile
+import copy
+import mammoth
+import pptx
+import pandas as pd
+import traceback
+from urllib.parse import urlparse, parse_qs
+from bs4 import BeautifulSoup
+from typing import Any, Dict, List, Optional, Union
+import pdfminer
+import pdfminer.high_level
+from youtube_transcript_api import YouTubeTranscriptApi
+class DocumentConverterResult:
+    """The result of converting a document to text."""
+    def __init__(self, title: Union[str, None] = None, text_content: str = ""):
+        self.title = title
+        self.text_content = text_content
+class DocumentConverter:
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        raise NotImplementedError()
+class PlainTextConverter(DocumentConverter):
+    """Anything with content type text/plain"""
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        extension = kwargs.get("file_extension", "")
+        if extension == "":
+            return None
+        content_type, encoding = mimetypes.guess_type("__placeholder" + extension)
+        text_content = ""
+        with open(local_path, "rt") as fh:
+            text_content = fh.read()
+        return DocumentConverterResult(
+            title=None,
+            text_content=text_content,
+        )
+class HtmlConverter(DocumentConverter):
+    """Anything with content type text/html"""
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not html
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".html", ".htm"]:
+            return None
+        result = None
+        with open(local_path, "rt") as fh:
+            result = self._convert(fh.read())
+        return result
+    def _convert(self, html_content) -> Union[None, DocumentConverterResult]:
+        """Helper function that converts and HTML string."""
+        # Parse the string
+        soup = BeautifulSoup(html_content, "html.parser")
+        # Remove javascript and style blocks
+        for script in soup(["script", "style"]):
+            script.extract()
+        # Print only the main content
+        body_elm = soup.find("body")
+        webpage_text = ""
+        if body_elm:
+            webpage_text = markdownify.MarkdownConverter().convert_soup(body_elm)
+        else:
+            webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
+        return DocumentConverterResult(
+            title=None if soup.title is None else soup.title.string,
+            text_content=webpage_text,
+        )
+class WikipediaConverter(DocumentConverter):
+    """Handle Wikipedia pages separately, focusing only on the main document content."""
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not Wikipedia
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".html", ".htm"]:
+            return None
+        url = kwargs.get("url", "")
+        if not re.search(r"^https?:\/\/[a-zA-Z]{2,3}\.wikipedia.org\/", url):
+            return None
+        # Parse the file
+        soup = None
+        with open(local_path, "rt") as fh:
+            soup = BeautifulSoup(fh.read(), "html.parser")
+        # Remove javascript and style blocks
+        for script in soup(["script", "style"]):
+            script.extract()
+        # Print only the main content
+        body_elm = soup.find("div", {"id": "mw-content-text"})
+        title_elm = soup.find("span", {"class": "mw-page-title-main"})
+        webpage_text = ""
+        if body_elm:
+            # What's the title
+            main_title = soup.title.string
+            if title_elm and len(title_elm) > 0:
+                main_title = title_elm.string
+            # Convert the page
+            webpage_text = "# " + main_title + "\n\n" + markdownify.MarkdownConverter().convert_soup(body_elm)
+        else:
+            webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
+        return DocumentConverterResult(
+            title=soup.title.string,
+            text_content=webpage_text,
+        )
+class YouTubeConverter(DocumentConverter):
+    """Handle YouTube specially, focusing on the video title, description, and transcript."""
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not YouTube
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".html", ".htm"]:
+            return None
+        url = kwargs.get("url", "")
+        if not url.startswith("https://www.youtube.com/watch?"):
+            return None
+        # Parse the file
+        soup = None
+        with open(local_path, "rt") as fh:
+            soup = BeautifulSoup(fh.read(), "html.parser")
+        # Read the meta tags
+        metadata = {"title": soup.title.string}
+        for meta in soup(["meta"]):
+            for a in meta.attrs:
+                if a in ["itemprop", "property", "name"]:
+                    metadata[meta[a]] = meta.get("content", "")
+                    break
+        # We can also try to read the full description. This is more prone to breaking, since it reaches into the page implementation
+        try:
+            for script in soup(["script"]):
+                content = script.text
+                if "ytInitialData" in content:
+                    lines = re.split(r"\r?\n", content)
+                    obj_start = lines[0].find("{")
+                    obj_end = lines[0].rfind("}")
+                    if obj_start >= 0 and obj_end >= 0:
+                        data = json.loads(lines[0][obj_start : obj_end + 1])
+                        attrdesc = self._findKey(data, "attributedDescriptionBodyText")
+                        if attrdesc:
+                            metadata["description"] = attrdesc["content"]
+                    break
+        except:
+            pass
+        # Start preparing the page
+        webpage_text = "# YouTube\n"
+        title = self._get(metadata, ["title", "og:title", "name"])
+        if title:
+            webpage_text += f"\n## {title}\n"
+        stats = ""
+        views = self._get(metadata, ["interactionCount"])
+        if views:
+            stats += f"- **Views:** {views}\n"
+        keywords = self._get(metadata, ["keywords"])
+        if keywords:
+            stats += f"- **Keywords:** {keywords}\n"
+        runtime = self._get(metadata, ["duration"])
+        if runtime:
+            stats += f"- **Runtime:** {runtime}\n"
+        if len(stats) > 0:
+            webpage_text += f"\n### Video Metadata\n{stats}\n"
+        description = self._get(metadata, ["description", "og:description"])
+        if description:
+            webpage_text += f"\n### Description\n{description}\n"
+        transcript_text = ""
+        parsed_url = urlparse(url)
+        params = parse_qs(parsed_url.query)
+        video_id = params["v"][0]
+        # Must be a single transcript.
+        print("VIDDDD ID:", video_id)
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        transcript_text = " ".join([part["text"] for part in transcript])
+        # Alternative formatting:
+        # formatter = TextFormatter()
+        # formatter.format_transcript(transcript)
+        if transcript_text:
+            webpage_text += f"\n### Transcript\n{transcript_text}\n"
+        return DocumentConverterResult(
+            title=title if title else soup.title.string,
+            text_content=webpage_text,
+        )
+    def _get(self, json, keys, default=None):
+        for k in keys:
+            if k in json:
+                return json[k]
+        return default
+    def _findKey(self, json, key):
+        if isinstance(json, list):
+            for elm in json:
+                ret = self._findKey(elm, key)
+                if ret is not None:
+                    return ret
+        elif isinstance(json, dict):
+            for k in json:
+                if k == key:
+                    return json[k]
+                else:
+                    ret = self._findKey(json[k], key)
+                    if ret is not None:
+                        return ret
+        return None
+class PdfConverter(DocumentConverter):
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a PDF
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".pdf":
+            return None
+        return DocumentConverterResult(
+            title=None,
+            text_content=pdfminer.high_level.extract_text(local_path),
+        )
+from huggingface_hub import InferenceClient
+class AudioConverter(DocumentConverter):
+    def __init__(self):
+        super().__init__()
+        self.client = InferenceClient("distil-whisper/distil-large-v3")
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not an audio file
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".wav", ".mp3", ".flac", ".m4a"]:
+            return None
+        try:
+            result = self.client.automatic_speech_recognition(audio=local_path).text
+        except Exception as e:
+            print("Exception in decoding audio:", e)
+            from openai import OpenAI
+            oai_client = OpenAI()
+            from pathlib import Path
+            result = oai_client.audio.transcriptions.create(
+                model="whisper-1",
+                file=Path(local_path)
+            ).text
+        return DocumentConverterResult(
+            title=None,
+            text_content=result,
+        )
+class DocxConverter(HtmlConverter):
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a DOCX
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".docx":
+            return None
+        result = None
+        with open(local_path, "rb") as docx_file:
+            result = mammoth.convert_to_html(docx_file)
+            html_content = result.value
+            result = self._convert(html_content)
+        return result
+class XlsxConverter(HtmlConverter):
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a XLSX
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".xlsx", ".xls"]:
+            return None
+        sheets = pd.read_excel(local_path, sheet_name=None)
+        md_content = ""
+        for s in sheets:
+            md_content += f"## {s}\n"
+            html_content = sheets[s].to_html(index=False)
+            md_content += self._convert(html_content).text_content.strip() + "\n\n"
+        return DocumentConverterResult(
+            title=None,
+            text_content=md_content.strip(),
+        )
+import xml.etree.ElementTree as ET
+class XmlConverter(DocumentConverter):
+    def convert(self, local_path, **kwargs) -> None | DocumentConverterResult:
+        # Parse the XML string
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".xml"]:
+            return None
+        xml_string = ""
+        with open(local_path, "rt") as fh:
+            xml_string = fh.read()
+        def extract_table_from_html_like(xml_root):
+            table = xml_root.find('.//table')
+            if table is None:
+                raise ValueError("No table found in the XML")
+            headers = [th.text for th in table.find('thead').findall('th')]
+            rows = [[td.text for td in tr.findall('td')] for tr in table.find('tbody').findall('tr')]
+            # Create markdown table
+            markdown = '| ' + ' | '.join(headers) + ' |\n'
+            markdown += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
+            for row in rows:
+                markdown += '| ' + ' | '.join(row) + ' |\n'
+        def extract_table_from_wordml(xml_root, namespaces):
+            # Parse the XML content
+            root = xml_root
+            namespace = {'w': 'http://schemas.microsoft.com/office/word/2003/wordml'}
+            # Extract text content
+            body = root.find('w:body', namespace)
+            paragraphs = body.findall('.//w:p', namespace)
+            text_content = []
+            for para in paragraphs:
+                texts = para.findall('.//w:t', namespace)
+                for text in texts:
+                    text_content.append(text.text)
+            return '\n'.join(text_content)
+        # Parse the XML string
+        root = ET.fromstring(xml_string)
+        namespaces = {'w': 'http://schemas.microsoft.com/office/word/2003/wordml'}
+        if root.tag.endswith('wordDocument'):
+            markdown = extract_table_from_wordml(root, namespaces)
+        else:
+            markdown = extract_table_from_html_like(root)
+        return DocumentConverterResult(
+            title=None,
+            text_content=markdown.strip(),
+        )
+class PptxConverter(HtmlConverter):
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a PPTX
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".pptx":
+            return None
+        md_content = ""
+        presentation = pptx.Presentation(local_path)
+        slide_num = 0
+        for slide in presentation.slides:
+            slide_num += 1
+            md_content += f"\n\n<!-- Slide number: {slide_num} -->\n"
+            title = slide.shapes.title
+            for shape in slide.shapes:
+                # Pictures
+                if self._is_picture(shape):
+                    # https://github.com/scanny/python-pptx/pull/512#issuecomment-1713100069
+                    alt_text = ""
+                    try:
+                        alt_text = shape._element._nvXxPr.cNvPr.attrib.get("descr", "")
+                    except:
+                        pass
+                    # A placeholder name
+                    filename = re.sub(r"\W", "", shape.name) + ".jpg"
+                    # try:
+                    #    filename = shape.image.filename
+                    # except:
+                    #    pass
+                    md_content += "\n![" + (alt_text if alt_text else shape.name) + "](" + filename + ")\n"
+                # Tables
+                if self._is_table(shape):
+                    html_table = "<html><body><table>"
+                    first_row = True
+                    for row in shape.table.rows:
+                        html_table += "<tr>"
+                        for cell in row.cells:
+                            if first_row:
+                                html_table += "<th>" + html.escape(cell.text) + "</th>"
+                            else:
+                                html_table += "<td>" + html.escape(cell.text) + "</td>"
+                        html_table += "</tr>"
+                        first_row = False
+                    html_table += "</table></body></html>"
+                    md_content += "\n" + self._convert(html_table).text_content.strip() + "\n"
+                # Text areas
+                elif shape.has_text_frame:
+                    if shape == title:
+                        md_content += "# " + shape.text.lstrip() + " "
+                    else:
+                        md_content += shape.text + " "
+            md_content = md_content.strip()
+            if slide.has_notes_slide:
+                md_content += "\n\n### Notes:\n"
+                notes_frame = slide.notes_slide.notes_text_frame
+                if notes_frame is not None:
+                    md_content += notes_frame.text
+                md_content = md_content.strip()
+        return DocumentConverterResult(
+            title=None,
+            text_content=md_content.strip(),
+        )
+    def _is_picture(self, shape):
+        if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PICTURE:
+            return True
+        if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PLACEHOLDER:
+            if hasattr(shape, "image"):
+                return True
+        return False
+    def _is_table(self, shape):
+        if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.TABLE:
+            return True
+        return False
+class FileConversionException(Exception):
+    pass
+class UnsupportedFormatException(Exception):
+    pass
+class MarkdownConverter:
+    """(In preview) An extremely simple text-based document reader, suitable for LLM use.
+    This reader will convert common file-types or webpages to Markdown."""
+    def __init__(
+        self,
+        requests_session: Optional[requests.Session] = None,
+    ):
+        if requests_session is None:
+            self._requests_session = requests.Session()
+        else:
+            self._requests_session = requests_session
+        self._page_converters: List[DocumentConverter] = []
+        # Register converters for successful browsing operations
+        # Later registrations are tried first / take higher priority than earlier registrations
+        # To this end, the most specific converters should appear below the most generic converters
+        self.register_page_converter(WikipediaConverter())
+        self.register_page_converter(XmlConverter())
+        self.register_page_converter(YouTubeConverter())
+        self.register_page_converter(DocxConverter())
+        self.register_page_converter(XlsxConverter())
+        self.register_page_converter(PptxConverter())
+        # self.register_page_converter(ImageConverter())
+        self.register_page_converter(PdfConverter())
+        self.register_page_converter(AudioConverter())
+        self.register_page_converter(HtmlConverter())
+        self.register_page_converter(PlainTextConverter())
+    def convert(self, source, **kwargs):
+        """
+        Args:
+            - source: can be a string representing a path or url, or a requests.response object
+            - extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
+        """
+        # Local path or url
+        if isinstance(source, str):
+            if source.startswith("http://") or source.startswith("https://") or source.startswith("file://"):
+                return self.convert_url(source, **kwargs)
+            else:
+                return self.convert_local(source, **kwargs)
+        # Request response
+        elif isinstance(source, requests.Response):
+            return self.convert_response(source, **kwargs)
+    def convert_local(self, path, **kwargs):
+        # Prepare a list of extensions to try (in order of priority)
+        ext = kwargs.get("file_extension")
+        extensions = [ext] if ext is not None else []
+        # Get extension alternatives from the path and puremagic
+        base, ext = os.path.splitext(path)
+        self._append_ext(extensions, ext)
+        self._append_ext(extensions, self._guess_ext_magic(path))
+        # Convert
+        return self._convert(path, extensions, **kwargs)
+    def convert_url(self, url, **kwargs):
+        # Send a HTTP request to the URL
+        user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
+        response = self._requests_session.get(url, stream=True, headers={"User-Agent": user_agent})
+        response.raise_for_status()
+        return self.convert_response(response, **kwargs)
+    def convert_response(self, response, **kwargs):
+        # Prepare a list of extensions to try (in order of priority)
+        ext = kwargs.get("file_extension")
+        extensions = [ext] if ext is not None else []
+        # Guess from the mimetype
+        content_type = response.headers.get("content-type", "").split(";")[0]
+        self._append_ext(extensions, mimetypes.guess_extension(content_type))
+        # Read the content disposition if there is one
+        content_disposition = response.headers.get("content-disposition", "")
+        m = re.search(r"filename=([^;]+)", content_disposition)
+        if m:
+            base, ext = os.path.splitext(m.group(1).strip("\"'"))
+            self._append_ext(extensions, ext)
+        # Read from the extension from the path
+        base, ext = os.path.splitext(urlparse(response.url).path)
+        self._append_ext(extensions, ext)
+        # Save the file locally to a temporary file. It will be deleted before this method exits
+        handle, temp_path = tempfile.mkstemp()
+        fh = os.fdopen(handle, "wb")
+        result = None
+        try:
+            # Download the file
+            for chunk in response.iter_content(chunk_size=512):
+                fh.write(chunk)
+            fh.close()
+            # Use puremagic to check for more extension options
+            self._append_ext(extensions, self._guess_ext_magic(temp_path))
+            # Convert
+            result = self._convert(temp_path, extensions, url=response.url)
+        except Exception as e:
+            print(f"Error in converting: {e}")
+        # Clean up
+        finally:
+            try:
+                fh.close()
+            except:
+                pass
+            os.unlink(temp_path)
+        return result
+    def _convert(self, local_path, extensions, **kwargs):
+        error_trace = ""
+        for ext in extensions:
+            for converter in self._page_converters:
+                _kwargs = copy.deepcopy(kwargs)
+                _kwargs.update({"file_extension": ext})
+                # If we hit an error log it and keep trying
+                try:
+                    res = converter.convert(local_path, **_kwargs)
+                    if res is not None:
+                        # Normalize the content
+                        res.text_content = "\n".join([line.rstrip() for line in re.split(r"\r?\n", res.text_content)])
+                        res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content)
+                        # Todo
+                        return res
+                except Exception as e:
+                    error_trace = ("\n\n" + traceback.format_exc()).strip()
+        # If we got this far without success, report any exceptions
+        if len(error_trace) > 0:
+            raise FileConversionException(
+                f"Could not convert '{local_path}' to Markdown. File type was recognized as {extensions}. While converting the file, the following error was encountered:\n\n{error_trace}"
+            )
+        # Nothing can handle it!
+        # raise UnsupportedFormatException(
+        #     f"Could not convert '{local_path}' to Markdown. The formats {extensions} are not supported."
+        # )
+        res = PlainTextConverter().convert(local_path, **kwargs)
+        return res
+    def _append_ext(self, extensions, ext):
+        """Append a unique non-None, non-empty extension to a list of extensions."""
+        if ext is None:
+            return
+        ext = ext.strip()
+        if ext == "":
+            return
+        # if ext not in extensions:
+        if True:
+            extensions.append(ext)
+    def _guess_ext_magic(self, path):
+        """Use puremagic (a Python implementation of libmagic) to guess a file's extension based on the first few bytes."""
+        # Use puremagic to guess
+        try:
+            guesses = puremagic.magic_file(path)
+            if len(guesses) > 0:
+                ext = guesses[0].extension.strip()
+                if len(ext) > 0:
+                    return ext
+        except FileNotFoundError:
+            pass
+        except IsADirectoryError:
+            pass
+        except PermissionError:
+            pass
+        return None
+    def register_page_converter(self, converter: DocumentConverter) -> None:
+        """Register a page text converter."""
+        self._page_converters.append(converter)

requirements (93).txt ADDED Viewed

	@@ -0,0 +1,32 @@

+transformers==4.43.0
+torch
+gradio
+huggingface_hub
+beautifulsoup4
+requests
+gradio_tools
+accelerate
+langchain
+sentence-transformers
+faiss-cpu
+langchain_community
+langchain-huggingface
+pypdf
+markdownify
+urllib3
+pathvalidate
+pdfminer.six
+pdfminer
+mammoth
+python-pptx
+pandas
+puremagic
+youtube_transcript_api
+google-search-results
+duckduckgo_search
+cmake
+numpy
+pynvml
+argparse
+typing
+tqdm

rwkv_cpp_model (2).py ADDED Viewed

	@@ -0,0 +1,388 @@

+import os
+import multiprocessing
+# Pre-import PyTorch, if available.
+# This fixes "OSError: [WinError 127] The specified procedure could not be found".
+try:
+    import torch
+except ModuleNotFoundError:
+    pass
+# I'm sure this is not strictly correct, but let's keep this crutch for now.
+try:
+    import rwkv_cpp_shared_library
+except ModuleNotFoundError:
+    from . import rwkv_cpp_shared_library
+from typing import TypeVar, Optional, Tuple, List
+# A value of this type is either a numpy's ndarray or a PyTorch's Tensor.
+NumpyArrayOrPyTorchTensor: TypeVar = TypeVar('NumpyArrayOrPyTorchTensor')
+class RWKVModel:
+    """
+    An RWKV model managed by rwkv.cpp library.
+    """
+    def __init__(
+            self,
+            shared_library: rwkv_cpp_shared_library.RWKVSharedLibrary,
+            model_path: str,
+            thread_count: int = max(1, multiprocessing.cpu_count() // 2),
+            gpu_layer_count: int = 0,
+            **kwargs
+    ) -> None:
+        """
+        Loads the model and prepares it for inference.
+        In case of any error, this method will throw an exception.
+        Parameters
+        ----------
+        shared_library : RWKVSharedLibrary
+            rwkv.cpp shared library.
+        model_path : str
+            Path to RWKV model file in ggml format.
+        thread_count : int
+            Thread count to use. If not set, defaults to CPU count / 2.
+        gpu_layer_count : int
+            Count of layers to offload onto the GPU, must be >= 0.
+            See documentation of `gpu_offload_layers` for details about layer offloading.
+        """
+        if 'gpu_layers_count' in kwargs:
+            gpu_layer_count = kwargs['gpu_layers_count']
+        if not os.path.isfile(model_path):
+            raise ValueError(f'{model_path} is not a file')
+        if not (thread_count > 0):
+            raise ValueError('Thread count must be > 0')
+        if not (gpu_layer_count >= 0):
+            raise ValueError('GPU layer count must be >= 0')
+        self._library: rwkv_cpp_shared_library.RWKVSharedLibrary = shared_library
+        self._ctx: rwkv_cpp_shared_library.RWKVContext = self._library.rwkv_init_from_file(model_path, thread_count)
+        if gpu_layer_count > 0:
+            self.gpu_offload_layers(gpu_layer_count)
+        self._state_buffer_element_count: int = self._library.rwkv_get_state_buffer_element_count(self._ctx)
+        self._logits_buffer_element_count: int = self._library.rwkv_get_logits_buffer_element_count(self._ctx)
+        self._valid: bool = True
+    def gpu_offload_layers(self, layer_count: int) -> bool:
+        """
+        Offloads specified count of model layers onto the GPU. Offloaded layers are evaluated using cuBLAS or CLBlast.
+        For the purposes of this function, model head (unembedding matrix) is treated as an additional layer:
+        - pass `model.n_layer` to offload all layers except model head
+        - pass `model.n_layer + 1` to offload all layers, including model head
+        Returns true if at least one layer was offloaded.
+        If rwkv.cpp was compiled without cuBLAS and CLBlast support, this function is a no-op and always returns false.
+        Parameters
+        ----------
+        layer_count : int
+            Count of layers to offload onto the GPU, must be >= 0.
+        """
+        if not (layer_count >= 0):
+            raise ValueError('Layer count must be >= 0')
+        return self._library.rwkv_gpu_offload_layers(self._ctx, layer_count)
+    @property
+    def n_vocab(self) -> int:
+        return self._library.rwkv_get_n_vocab(self._ctx)
+    @property
+    def n_embed(self) -> int:
+        return self._library.rwkv_get_n_embed(self._ctx)
+    @property
+    def n_layer(self) -> int:
+        return self._library.rwkv_get_n_layer(self._ctx)
+    def eval(
+            self,
+            token: int,
+            state_in: Optional[NumpyArrayOrPyTorchTensor],
+            state_out: Optional[NumpyArrayOrPyTorchTensor] = None,
+            logits_out: Optional[NumpyArrayOrPyTorchTensor] = None,
+            use_numpy: bool = False
+    ) -> Tuple[NumpyArrayOrPyTorchTensor, NumpyArrayOrPyTorchTensor]:
+        """
+        Evaluates the model for a single token.
+        In case of any error, this method will throw an exception.
+        Parameters
+        ----------
+        token : int
+            Index of next token to be seen by the model. Must be in range 0 <= token < n_vocab.
+        state_in : Optional[NumpyArrayOrTorchTensor]
+            State from previous call of this method. If this is a first pass, set it to None.
+        state_out : Optional[NumpyArrayOrTorchTensor]
+            Optional output tensor for state. If provided, must be of type float32, contiguous and of shape (state_buffer_element_count).
+        logits_out : Optional[NumpyArrayOrTorchTensor]
+            Optional output tensor for logits. If provided, must be of type float32, contiguous and of shape (logits_buffer_element_count).
+        use_numpy : bool
+            If set to True, numpy's ndarrays will be created instead of PyTorch's Tensors.
+            This parameter is ignored if any tensor parameter is not None; in such case,
+            type of returned tensors will match the type of received tensors.
+        Returns
+        -------
+        logits, state
+            Logits vector of shape (n_vocab); state for the next step.
+        """
+        if not self._valid:
+            raise ValueError('Model was freed')
+        use_numpy = self._detect_numpy_usage([state_in, state_out, logits_out], use_numpy)
+        if state_in is not None:
+            self._validate_tensor(state_in, 'state_in', self._state_buffer_element_count)
+            state_in_ptr = self._get_data_ptr(state_in)
+        else:
+            state_in_ptr = 0
+        if state_out is not None:
+            self._validate_tensor(state_out, 'state_out', self._state_buffer_element_count)
+        else:
+            state_out = self._zeros_float32(self._state_buffer_element_count, use_numpy)
+        if logits_out is not None:
+            self._validate_tensor(logits_out, 'logits_out', self._logits_buffer_element_count)
+        else:
+            logits_out = self._zeros_float32(self._logits_buffer_element_count, use_numpy)
+        self._library.rwkv_eval(
+            self._ctx,
+            token,
+            state_in_ptr,
+            self._get_data_ptr(state_out),
+            self._get_data_ptr(logits_out)
+        )
+        return logits_out, state_out
+    def eval_sequence(
+            self,
+            tokens: List[int],
+            state_in: Optional[NumpyArrayOrPyTorchTensor],
+            state_out: Optional[NumpyArrayOrPyTorchTensor] = None,
+            logits_out: Optional[NumpyArrayOrPyTorchTensor] = None,
+            use_numpy: bool = False
+    ) -> Tuple[NumpyArrayOrPyTorchTensor, NumpyArrayOrPyTorchTensor]:
+        """
+        Evaluates the model for a sequence of tokens.
+        NOTE ON GGML NODE LIMIT
+        ggml has a hard-coded limit on max amount of nodes in a computation graph. The sequence graph is built in a way that quickly exceedes
+        this limit when using large models and/or large sequence lengths.
+        Fortunately, rwkv.cpp's fork of ggml has increased limit which was tested to work for sequence lengths up to 64 for 14B models.
+        If you get `GGML_ASSERT: ...\\ggml.c:16941: cgraph->n_nodes < GGML_MAX_NODES`, this means you've exceeded the limit.
+        To get rid of the assertion failure, reduce the model size and/or sequence length.
+        In case of any error, this method will throw an exception.
+        Parameters
+        ----------
+        tokens : List[int]
+            Indices of the next tokens to be seen by the model. Must be in range 0 <= token < n_vocab.
+        state_in : Optional[NumpyArrayOrTorchTensor]
+            State from previous call of this method. If this is a first pass, set it to None.
+        state_out : Optional[NumpyArrayOrTorchTensor]
+            Optional output tensor for state. If provided, must be of type float32, contiguous and of shape (state_buffer_element_count).
+        logits_out : Optional[NumpyArrayOrTorchTensor]
+            Optional output tensor for logits. If provided, must be of type float32, contiguous and of shape (logits_buffer_element_count).
+        use_numpy : bool
+            If set to True, numpy's ndarrays will be created instead of PyTorch's Tensors.
+            This parameter is ignored if any tensor parameter is not None; in such case,
+            type of returned tensors will match the type of received tensors.
+        Returns
+        -------
+        logits, state
+            Logits vector of shape (n_vocab); state for the next step.
+        """
+        if not self._valid:
+            raise ValueError('Model was freed')
+        use_numpy = self._detect_numpy_usage([state_in, state_out, logits_out], use_numpy)
+        if state_in is not None:
+            self._validate_tensor(state_in, 'state_in', self._state_buffer_element_count)
+            state_in_ptr = self._get_data_ptr(state_in)
+        else:
+            state_in_ptr = 0
+        if state_out is not None:
+            self._validate_tensor(state_out, 'state_out', self._state_buffer_element_count)
+        else:
+            state_out = self._zeros_float32(self._state_buffer_element_count, use_numpy)
+        if logits_out is not None:
+            self._validate_tensor(logits_out, 'logits_out', self._logits_buffer_element_count)
+        else:
+            logits_out = self._zeros_float32(self._logits_buffer_element_count, use_numpy)
+        self._library.rwkv_eval_sequence(
+            self._ctx,
+            tokens,
+            state_in_ptr,
+            self._get_data_ptr(state_out),
+            self._get_data_ptr(logits_out)
+        )
+        return logits_out, state_out
+    def eval_sequence_in_chunks(
+            self,
+            tokens: List[int],
+            state_in: Optional[NumpyArrayOrPyTorchTensor],
+            state_out: Optional[NumpyArrayOrPyTorchTensor] = None,
+            logits_out: Optional[NumpyArrayOrPyTorchTensor] = None,
+            chunk_size: int = 16,
+            use_numpy: bool = False
+    ) -> Tuple[NumpyArrayOrPyTorchTensor, NumpyArrayOrPyTorchTensor]:
+        """
+        Evaluates the model for a sequence of tokens using `eval_sequence`, splitting a potentially long sequence into fixed-length chunks.
+        This function is useful for processing complete prompts and user input in chat & role-playing use-cases.
+        It is recommended to use this function instead of `eval_sequence` to avoid mistakes and get maximum performance.
+        Chunking allows processing sequences of thousands of tokens, while not reaching the ggml's node limit and not consuming too much memory.
+        A reasonable and recommended value of chunk size is 16. If you want maximum performance, try different chunk sizes in range [2..64]
+        and choose one that works the best in your use case.
+        In case of any error, this method will throw an exception.
+        Parameters
+        ----------
+        tokens : List[int]
+            Indices of the next tokens to be seen by the model. Must be in range 0 <= token < n_vocab.
+        chunk_size : int
+            Size of each chunk in tokens, must be positive.
+        state_in : Optional[NumpyArrayOrTorchTensor]
+            State from previous call of this method. If this is a first pass, set it to None.
+        state_out : Optional[NumpyArrayOrTorchTensor]
+            Optional output tensor for state. If provided, must be of type float32, contiguous and of shape (state_buffer_element_count).
+        logits_out : Optional[NumpyArrayOrTorchTensor]
+            Optional output tensor for logits. If provided, must be of type float32, contiguous and of shape (logits_buffer_element_count).
+        use_numpy : bool
+            If set to True, numpy's ndarrays will be created instead of PyTorch's Tensors.
+            This parameter is ignored if any tensor parameter is not None; in such case,
+            type of returned tensors will match the type of received tensors.
+        Returns
+        -------
+        logits, state
+            Logits vector of shape (n_vocab); state for the next step.
+        """
+        if not self._valid:
+            raise ValueError('Model was freed')
+        use_numpy = self._detect_numpy_usage([state_in, state_out, logits_out], use_numpy)
+        if state_in is not None:
+            self._validate_tensor(state_in, 'state_in', self._state_buffer_element_count)
+            state_in_ptr = self._get_data_ptr(state_in)
+        else:
+            state_in_ptr = 0
+        if state_out is not None:
+            self._validate_tensor(state_out, 'state_out', self._state_buffer_element_count)
+        else:
+            state_out = self._zeros_float32(self._state_buffer_element_count, use_numpy)
+        if logits_out is not None:
+            self._validate_tensor(logits_out, 'logits_out', self._logits_buffer_element_count)
+        else:
+            logits_out = self._zeros_float32(self._logits_buffer_element_count, use_numpy)
+        self._library.rwkv_eval_sequence_in_chunks(
+            self._ctx,
+            tokens,
+            chunk_size,
+            state_in_ptr,
+            self._get_data_ptr(state_out),
+            self._get_data_ptr(logits_out)
+        )
+        return logits_out, state_out
+    def free(self) -> None:
+        """
+        Frees all allocated resources.
+        In case of any error, this method will throw an exception.
+        The object must not be used anymore after calling this method.
+        """
+        if not self._valid:
+            raise ValueError('Already freed')
+        self._valid = False
+        self._library.rwkv_free(self._ctx)
+    def __del__(self) -> None:
+        # Free the context on GC in case user forgot to call free() explicitly.
+        if hasattr(self, '_valid') and self._valid:
+            self.free()
+    def _is_pytorch_tensor(self, tensor: NumpyArrayOrPyTorchTensor) -> bool:
+        return hasattr(tensor, '__module__') and tensor.__module__ == 'torch'
+    def _detect_numpy_usage(self, tensors: List[Optional[NumpyArrayOrPyTorchTensor]], use_numpy_by_default: bool) -> bool:
+        for tensor in tensors:
+            if tensor is not None:
+                return False if self._is_pytorch_tensor(tensor) else True
+        return use_numpy_by_default
+    def _validate_tensor(self, tensor: NumpyArrayOrPyTorchTensor, name: str, size: int) -> None:
+        if self._is_pytorch_tensor(tensor):
+            tensor: torch.Tensor = tensor
+            if tensor.device != torch.device('cpu'):
+                raise ValueError(f'{name} is not on CPU')
+            if tensor.dtype != torch.float32:
+                raise ValueError(f'{name} is not of type float32')
+            if tensor.shape != (size,):
+                raise ValueError(f'{name} has invalid shape {tensor.shape}, expected ({size})')
+            if not tensor.is_contiguous():
+                raise ValueError(f'{name} is not contiguous')
+        else:
+            import numpy as np
+            tensor: np.ndarray = tensor
+            if tensor.dtype != np.float32:
+                raise ValueError(f'{name} is not of type float32')
+            if tensor.shape != (size,):
+                raise ValueError(f'{name} has invalid shape {tensor.shape}, expected ({size})')
+            if not tensor.data.contiguous:
+                raise ValueError(f'{name} is not contiguous')
+    def _get_data_ptr(self, tensor: NumpyArrayOrPyTorchTensor):
+        if self._is_pytorch_tensor(tensor):
+            return tensor.data_ptr()
+        else:
+            return tensor.ctypes.data
+    def _zeros_float32(self, element_count: int, use_numpy: bool) -> NumpyArrayOrPyTorchTensor:
+        if use_numpy:
+            import numpy as np
+            return np.zeros(element_count, dtype=np.float32)
+        else:
+            return torch.zeros(element_count, dtype=torch.float32, device='cpu')

rwkv_cpp_shared_library (2).py ADDED Viewed

	@@ -0,0 +1,450 @@

+import os
+import sys
+import ctypes
+import pathlib
+import platform
+from typing import Optional, List, Tuple, Callable
+QUANTIZED_FORMAT_NAMES: Tuple[str, str, str, str, str] = (
+    'Q4_0',
+    'Q4_1',
+    'Q5_0',
+    'Q5_1',
+    'Q8_0'
+)
+P_FLOAT = ctypes.POINTER(ctypes.c_float)
+P_INT = ctypes.POINTER(ctypes.c_int32)
+class RWKVContext:
+    def __init__(self, ptr: ctypes.pointer) -> None:
+        self.ptr: ctypes.pointer = ptr
+class RWKVSharedLibrary:
+    """
+    Python wrapper around rwkv.cpp shared library.
+    """
+    def __init__(self, shared_library_path: str) -> None:
+        """
+        Loads the shared library from specified file.
+        In case of any error, this method will throw an exception.
+        Parameters
+        ----------
+        shared_library_path : str
+            Path to rwkv.cpp shared library. On Windows, it would look like 'rwkv.dll'. On UNIX, 'rwkv.so'.
+        """
+        #  When Python is greater than 3.8, we need to reprocess the custom dll
+        #  according to the documentation to prevent loading failure errors.
+        #  https://docs.python.org/3/whatsnew/3.8.html#ctypes
+        if platform.system().lower() == 'windows':
+            self.library = ctypes.CDLL(shared_library_path, winmode=0)
+        else:
+            self.library = ctypes.cdll.LoadLibrary(shared_library_path)
+        self.library.rwkv_init_from_file.argtypes = [ctypes.c_char_p, ctypes.c_uint32]
+        self.library.rwkv_init_from_file.restype = ctypes.c_void_p
+        self.library.rwkv_gpu_offload_layers.argtypes = [ctypes.c_void_p, ctypes.c_uint32]
+        self.library.rwkv_gpu_offload_layers.restype = ctypes.c_bool
+        self.library.rwkv_eval.argtypes = [
+            ctypes.c_void_p, # ctx
+            ctypes.c_int32, # token
+            P_FLOAT, # state_in
+            P_FLOAT, # state_out
+            P_FLOAT  # logits_out
+        ]
+        self.library.rwkv_eval.restype = ctypes.c_bool
+        self.library.rwkv_eval_sequence.argtypes = [
+            ctypes.c_void_p, # ctx
+            P_INT, # tokens
+            ctypes.c_size_t, # token count
+            P_FLOAT, # state_in
+            P_FLOAT, # state_out
+            P_FLOAT  # logits_out
+        ]
+        self.library.rwkv_eval_sequence.restype = ctypes.c_bool
+        self.library.rwkv_eval_sequence_in_chunks.argtypes = [
+            ctypes.c_void_p, # ctx
+            P_INT, # tokens
+            ctypes.c_size_t, # token count
+            ctypes.c_size_t, # chunk size
+            P_FLOAT, # state_in
+            P_FLOAT, # state_out
+            P_FLOAT  # logits_out
+        ]
+        self.library.rwkv_eval_sequence_in_chunks.restype = ctypes.c_bool
+        self.library.rwkv_get_n_vocab.argtypes = [ctypes.c_void_p]
+        self.library.rwkv_get_n_vocab.restype = ctypes.c_size_t
+        self.library.rwkv_get_n_embed.argtypes = [ctypes.c_void_p]
+        self.library.rwkv_get_n_embed.restype = ctypes.c_size_t
+        self.library.rwkv_get_n_layer.argtypes = [ctypes.c_void_p]
+        self.library.rwkv_get_n_layer.restype = ctypes.c_size_t
+        self.library.rwkv_get_state_buffer_element_count.argtypes = [ctypes.c_void_p]
+        self.library.rwkv_get_state_buffer_element_count.restype = ctypes.c_uint32
+        self.library.rwkv_get_logits_buffer_element_count.argtypes = [ctypes.c_void_p]
+        self.library.rwkv_get_logits_buffer_element_count.restype = ctypes.c_uint32
+        self.library.rwkv_free.argtypes = [ctypes.c_void_p]
+        self.library.rwkv_free.restype = None
+        self.library.rwkv_free.argtypes = [ctypes.c_void_p]
+        self.library.rwkv_free.restype = None
+        self.library.rwkv_quantize_model_file.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_char_p]
+        self.library.rwkv_quantize_model_file.restype = ctypes.c_bool
+        self.library.rwkv_get_system_info_string.argtypes = []
+        self.library.rwkv_get_system_info_string.restype = ctypes.c_char_p
+        self.nullptr = ctypes.cast(0, ctypes.c_void_p)
+    def rwkv_init_from_file(self, model_file_path: str, thread_count: int) -> RWKVContext:
+        """
+        Loads the model from a file and prepares it for inference.
+        Throws an exception in case of any error. Error messages would be printed to stderr.
+        Parameters
+        ----------
+        model_file_path : str
+            Path to model file in ggml format.
+        thread_count : int
+            Count of threads to use, must be positive.
+        """
+        ptr = self.library.rwkv_init_from_file(model_file_path.encode('utf-8'), ctypes.c_uint32(thread_count))
+        if ptr is None:
+            raise ValueError('rwkv_init_from_file failed, check stderr')
+        return RWKVContext(ptr)
+    def rwkv_gpu_offload_layers(self, ctx: RWKVContext, layer_count: int) -> bool:
+        """
+        Offloads specified count of model layers onto the GPU. Offloaded layers are evaluated using cuBLAS or CLBlast.
+        For the purposes of this function, model head (unembedding matrix) is treated as an additional layer:
+        - pass `rwkv_get_n_layer(ctx)` to offload all layers except model head
+        - pass `rwkv_get_n_layer(ctx) + 1` to offload all layers, including model head
+        Returns true if at least one layer was offloaded.
+        If rwkv.cpp was compiled without cuBLAS and CLBlast support, this function is a no-op and always returns false.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        layer_count : int
+            Count of layers to offload onto the GPU, must be >= 0.
+        """
+        if not (layer_count >= 0):
+            raise ValueError('Layer count must be >= 0')
+        return self.library.rwkv_gpu_offload_layers(ctx.ptr, ctypes.c_uint32(layer_count))
+    def rwkv_eval(
+            self,
+            ctx: RWKVContext,
+            token: int,
+            state_in_address: Optional[int],
+            state_out_address: int,
+            logits_out_address: int
+    ) -> None:
+        """
+        Evaluates the model for a single token.
+        Throws an exception in case of any error. Error messages would be printed to stderr.
+        Not thread-safe. For parallel inference, call rwkv_clone_context to create one rwkv_context for each thread.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        token : int
+            Next token index, in range 0 <= token < n_vocab.
+        state_in_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_state_buffer_element_count; or None, if this is a first pass.
+        state_out_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_state_buffer_element_count. This buffer will be written to.
+        logits_out_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_logits_buffer_element_count. This buffer will be written to.
+        """
+        if not self.library.rwkv_eval(
+            ctx.ptr,
+            ctypes.c_int32(token),
+            ctypes.cast(0 if state_in_address is None else state_in_address, P_FLOAT),
+            ctypes.cast(state_out_address, P_FLOAT),
+            ctypes.cast(logits_out_address, P_FLOAT)
+        ):
+            raise ValueError('rwkv_eval failed, check stderr')
+    def rwkv_eval_sequence(
+            self,
+            ctx: RWKVContext,
+            tokens: List[int],
+            state_in_address: Optional[int],
+            state_out_address: int,
+            logits_out_address: int
+    ) -> None:
+        """
+        Evaluates the model for a sequence of tokens.
+        Uses a faster algorithm than `rwkv_eval` if you do not need the state and logits for every token. Best used with sequence lengths of 64 or so.
+        Has to build a computation graph on the first call for a given sequence, but will use this cached graph for subsequent calls of the same sequence length.
+        NOTE ON GGML NODE LIMIT
+        ggml has a hard-coded limit on max amount of nodes in a computation graph. The sequence graph is built in a way that quickly exceedes
+        this limit when using large models and/or large sequence lengths.
+        Fortunately, rwkv.cpp's fork of ggml has increased limit which was tested to work for sequence lengths up to 64 for 14B models.
+        If you get `GGML_ASSERT: ...\\ggml.c:16941: cgraph->n_nodes < GGML_MAX_NODES`, this means you've exceeded the limit.
+        To get rid of the assertion failure, reduce the model size and/or sequence length.
+        Not thread-safe. For parallel inference, call `rwkv_clone_context` to create one rwkv_context for each thread.
+        Throws an exception in case of any error. Error messages would be printed to stderr.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        tokens : List[int]
+            Next token indices, in range 0 <= token < n_vocab.
+        state_in_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_state_buffer_element_count; or None, if this is a first pass.
+        state_out_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_state_buffer_element_count. This buffer will be written to.
+        logits_out_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_logits_buffer_element_count. This buffer will be written to.
+        """
+        if not self.library.rwkv_eval_sequence(
+            ctx.ptr,
+            ctypes.cast((ctypes.c_int32 * len(tokens))(*tokens), P_INT),
+            ctypes.c_size_t(len(tokens)),
+            ctypes.cast(0 if state_in_address is None else state_in_address, P_FLOAT),
+            ctypes.cast(state_out_address, P_FLOAT),
+            ctypes.cast(logits_out_address, P_FLOAT)
+        ):
+            raise ValueError('rwkv_eval_sequence failed, check stderr')
+    def rwkv_eval_sequence_in_chunks(
+            self,
+            ctx: RWKVContext,
+            tokens: List[int],
+            chunk_size: int,
+            state_in_address: Optional[int],
+            state_out_address: int,
+            logits_out_address: int
+    ) -> None:
+        """
+        Evaluates the model for a sequence of tokens using `rwkv_eval_sequence`, splitting a potentially long sequence into fixed-length chunks.
+        This function is useful for processing complete prompts and user input in chat & role-playing use-cases.
+        It is recommended to use this function instead of `rwkv_eval_sequence` to avoid mistakes and get maximum performance.
+        Chunking allows processing sequences of thousands of tokens, while not reaching the ggml's node limit and not consuming too much memory.
+        A reasonable and recommended value of chunk size is 16. If you want maximum performance, try different chunk sizes in range [2..64]
+        and choose one that works the best in your use case.
+        Not thread-safe. For parallel inference, call `rwkv_clone_context` to create one rwkv_context for each thread.
+        Throws an exception in case of any error. Error messages would be printed to stderr.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        tokens : List[int]
+            Next token indices, in range 0 <= token < n_vocab.
+        chunk_size : int
+            Size of each chunk in tokens, must be positive.
+        state_in_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_state_buffer_element_count; or None, if this is a first pass.
+        state_out_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_state_buffer_element_count. This buffer will be written to.
+        logits_out_address : int
+            Address of the first element of a FP32 buffer of size rwkv_get_logits_buffer_element_count. This buffer will be written to.
+        """
+        if not self.library.rwkv_eval_sequence_in_chunks(
+            ctx.ptr,
+            ctypes.cast((ctypes.c_int32 * len(tokens))(*tokens), P_INT),
+            ctypes.c_size_t(len(tokens)),
+            ctypes.c_size_t(chunk_size),
+            ctypes.cast(0 if state_in_address is None else state_in_address, P_FLOAT),
+            ctypes.cast(state_out_address, P_FLOAT),
+            ctypes.cast(logits_out_address, P_FLOAT)
+        ):
+            raise ValueError('rwkv_eval_sequence_in_chunks failed, check stderr')
+    def rwkv_get_n_vocab(self, ctx: RWKVContext) -> int:
+        """
+        Returns the number of tokens in the given model's vocabulary.
+        Useful for telling 20B_tokenizer models (n_vocab = 50277) apart from World models (n_vocab = 65536).
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        """
+        return self.library.rwkv_get_n_vocab(ctx.ptr)
+    def rwkv_get_n_embed(self, ctx: RWKVContext) -> int:
+        """
+        Returns the number of elements in the given model's embedding.
+        Useful for reading individual fields of a model's hidden state.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        """
+        return self.library.rwkv_get_n_embed(ctx.ptr)
+    def rwkv_get_n_layer(self, ctx: RWKVContext) -> int:
+        """
+        Returns the number of layers in the given model.
+        A layer is a pair of RWKV and FFN operations, stacked multiple times throughout the model.
+        Embedding matrix and model head (unembedding matrix) are NOT counted in `n_layer`.
+        Useful for always offloading the entire model to GPU.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        """
+        return self.library.rwkv_get_n_layer(ctx.ptr)
+    def rwkv_get_state_buffer_element_count(self, ctx: RWKVContext) -> int:
+        """
+        Returns count of FP32 elements in state buffer.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        """
+        return self.library.rwkv_get_state_buffer_element_count(ctx.ptr)
+    def rwkv_get_logits_buffer_element_count(self, ctx: RWKVContext) -> int:
+        """
+        Returns count of FP32 elements in logits buffer.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        """
+        return self.library.rwkv_get_logits_buffer_element_count(ctx.ptr)
+    def rwkv_free(self, ctx: RWKVContext) -> None:
+        """
+        Frees all allocated memory and the context.
+        Parameters
+        ----------
+        ctx : RWKVContext
+            RWKV context obtained from rwkv_init_from_file.
+        """
+        self.library.rwkv_free(ctx.ptr)
+        ctx.ptr = self.nullptr
+    def rwkv_quantize_model_file(self, model_file_path_in: str, model_file_path_out: str, format_name: str) -> None:
+        """
+        Quantizes FP32 or FP16 model to one of INT4 formats.
+        Throws an exception in case of any error. Error messages would be printed to stderr.
+        Parameters
+        ----------
+        model_file_path_in : str
+            Path to model file in ggml format, must be either FP32 or FP16.
+        model_file_path_out : str
+            Quantized model will be written here.
+        format_name : str
+            One of QUANTIZED_FORMAT_NAMES.
+        """
+        if format_name not in QUANTIZED_FORMAT_NAMES:
+            raise ValueError(f'Unknown format name {format_name}, use one of {QUANTIZED_FORMAT_NAMES}')
+        if not self.library.rwkv_quantize_model_file(
+            model_file_path_in.encode('utf-8'),
+            model_file_path_out.encode('utf-8'),
+            format_name.encode('utf-8')
+        ):
+            raise ValueError('rwkv_quantize_model_file failed, check stderr')
+    def rwkv_get_system_info_string(self) -> str:
+        """
+        Returns system information string.
+        """
+        return self.library.rwkv_get_system_info_string().decode('utf-8')
+def load_rwkv_shared_library() -> RWKVSharedLibrary:
+    """
+    Attempts to find rwkv.cpp shared library and load it.
+    To specify exact path to the library, create an instance of RWKVSharedLibrary explicitly.
+    """
+    file_name: str
+    if 'win32' in sys.platform or 'cygwin' in sys.platform:
+        file_name = 'rwkv.dll'
+    elif 'darwin' in sys.platform:
+        file_name = 'librwkv.dylib'
+    else:
+        file_name = 'librwkv.so'
+    # Possible sub-paths to the library relative to the repo dir.
+    child_paths: List[Callable[[pathlib.Path], pathlib.Path]] = [
+        # No lookup for Debug config here.
+        # I assume that if a user wants to debug the library,
+        # they will be able to find the library and set the exact path explicitly.
+        lambda p: p / 'bin' / 'Release' / file_name,
+        lambda p: p / 'bin' / file_name,
+        # Some people prefer to build in the "build" subdirectory.
+        lambda p: p / 'build' / 'bin' / 'Release' / file_name,
+        lambda p: p / 'build' / 'bin' / file_name,
+        lambda p: p / 'build' / file_name,
+        # Fallback.
+        lambda p: p / file_name
+    ]
+    working_dir: pathlib.Path = pathlib.Path(os.path.abspath(os.getcwd()))
+    parent_paths: List[pathlib.Path] = [
+        # Possible repo dirs relative to the working dir.
+        # ./python/rwkv_cpp
+        working_dir.parent.parent,
+        # ./python
+        working_dir.parent,
+        # .
+        working_dir,
+        # Repo dir relative to this Python file.
+        pathlib.Path(os.path.abspath(__file__)).parent.parent.parent
+    ]
+    for parent_path in parent_paths:
+        for child_path in child_paths:
+            full_path: pathlib.Path = child_path(parent_path)
+            if os.path.isfile(full_path):
+                return RWKVSharedLibrary(str(full_path))
+    raise ValueError(f'Failed to find {file_name} automatically; '
+                     f'you need to find the library and create RWKVSharedLibrary specifying the path to it')

rwkv_world_tokenizer (2).py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import pathlib
+from typing import List, Set, Tuple, Callable
+# Taken from https://github.com/BlinkDL/ChatRWKV/tree/main/tokenizer/rwkv_tokenizer.py
+class Trie:
+    __slots__ = ('ch', 'to', 'values', 'front')
+    def __init__(self, front=None, ch=None) -> None:
+        self.ch = ch
+        self.to: List = [None for _ in range(256)]
+        self.values: Set = set()
+        self.front = front
+    def add(self, key: bytes, idx: int = 0, val=None) -> 'Trie':
+        if idx == len(key):
+            if val is None:
+                val = key
+            self.values.add(val)
+            return self
+        ch = key[idx]
+        if self.to[ch] is None:
+            self.to[ch] = Trie(front=self, ch=ch)
+        return self.to[ch].add(key, idx=idx + 1, val=val)
+    def find_longest(self, key: bytes, idx: int = 0) -> Tuple[int, 'Trie', set]:
+        u: Trie = self
+        ch: int = key[idx]
+        ret = None
+        while u.to[ch] is not None:
+            u = u.to[ch]
+            idx += 1
+            if u.values:
+                ret = idx, u, u.values
+            if idx == len(key):
+                break
+            ch = key[idx]
+        if ret is None:
+            raise ValueError('Entry not found')
+        return ret
+    def __repr__(self) -> str:
+        fr = self
+        ret = []
+        while fr is not None:
+            if fr.ch is not None:
+                ret.append(fr.ch)
+            fr = fr.front
+        return '<TRIE %s %s>' % (ret[::-1], self.values)
+class WorldTokenizer:
+    def __init__(self, file_path) -> None:
+        self.index_to_token = {}
+        with open(file_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        for line in lines:
+            idx = int(line[:line.index(' ')])
+            x = eval(line[line.index(' '):line.rindex(' ')])
+            x = x.encode('utf-8') if isinstance(x, str) else x
+            assert isinstance(x, bytes)
+            assert len(x) == int(line[line.rindex(' '):])
+            self.index_to_token[idx] = x
+        self.token_to_index = {}
+        for k, v in self.index_to_token.items():
+            self.token_to_index[v] = int(k)
+        self.root = Trie()
+        for t, i in self.token_to_index.items():
+            _ = self.root.add(t, val=(t, i))
+    def encode_bytes(self, src: bytes) -> List[int]:
+        idx: int = 0
+        tokens: List[int] = []
+        while idx < len(src):
+            _idx: int = idx
+            idx, _, values = self.root.find_longest(src, idx)
+            assert (idx != _idx)
+            _, token = next(iter(values))
+            tokens.append(token)
+        return tokens
+    def decode_bytes(self, tokens: List[int]) -> bytes:
+        return b''.join(map(lambda i: self.index_to_token[i], tokens))
+    def encode(self, src: str) -> List[int]:
+        return self.encode_bytes(src.encode('utf-8'))
+    def decode(self, tokens: List[int]) -> str:
+        # 'replace' error handling mode will insert \uFFFD characters in place of malformed/partial UTF-8 sequences.
+        # Downstream code needs to detect \uFFFD and attempt to postpone decoding until more tokens arrive and UTF-8 sequences are complete.
+        return self.decode_bytes(tokens).decode('utf-8', errors='replace')
+def get_world_tokenizer_v20230424() -> Tuple[
+    Callable[[List[int]], str],
+    Callable[[str], List[int]]
+]:
+    """
+    Loads the default World tokenizer, commonly used in RWKV v4 World models.
+    Returns a tuple of `decode` and `encode` functions.
+    """
+    parent: pathlib.Path = pathlib.Path(os.path.abspath(__file__)).parent
+    tokenizer: WorldTokenizer = WorldTokenizer(parent / 'rwkv_vocab_v20230424.txt')
+    return tokenizer.decode, tokenizer.encode

sampling (2).py ADDED Viewed

	@@ -0,0 +1,52 @@

+import numpy as np
+from typing import Dict
+# https://stackoverflow.com/a/50425683
+def softmax(x: np.ndarray, axis: int):
+    x -= x.max(axis=axis, keepdims=True)
+    e: np.ndarray = np.exp(x)
+    return e / e.sum(axis=axis, keepdims=True)
+def sample_logits(out, temperature: float = 1.0, top_p: float = 0.8, logit_bias: Dict[int, float] = None) -> int:
+    if hasattr(out, '__module__') and out.__module__ == 'torch':
+        out = out.cpu().numpy()
+    probs: np.ndarray = softmax(out, axis=-1)
+    return sample_probs(probs, temperature, top_p, logit_bias)
+def sample_probs(probs: np.ndarray, temperature: float = 1.0, top_p: float = 0.8, logit_bias: Dict[int, float] = None) -> int:
+    if not (0.0 <= temperature):
+        raise ValueError('temperature')
+    if not (0.0 <= top_p <= 1.0):
+        raise ValueError('top_p')
+    if top_p == 0.0:
+        top_p = 1.0
+    if logit_bias is not None and len(logit_bias) > 0:
+        logits: np.ndarray = np.log(probs)
+        ids, values = zip(*logit_bias.items())
+        logits[list(ids)] += values
+        # Makes calculation more numerically stable, does not change the result
+        logits -= logits.max(axis=-1, keepdims=True)
+        probs = np.exp(logits) / np.sum(np.exp(logits))
+    if temperature == 0.0:
+        return np.argmax(probs).item()
+    if top_p < 1.0:
+        sorted_probs = np.sort(probs)[::-1]
+        cumulative_probs = np.cumsum(sorted_probs)
+        cutoff = float(sorted_probs[np.argmax(cumulative_probs > top_p)])
+        probs[probs < cutoff] = 0
+    if temperature != 1.0:
+        probs = np.power(probs, 1.0 / temperature)
+    probs = probs / np.sum(probs)
+    return np.random.choice(a=len(probs), p=probs)

tokenizer_util (2).py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import pathlib
+import rwkv_world_tokenizer
+from typing import List, Tuple, Callable
+def add_tokenizer_argument(parser) -> None:
+    parser.add_argument(
+            'tokenizer',
+            help='Tokenizer to use; supported tokenizers: auto (guess from n_vocab), 20B, world',
+            nargs='?',
+            type=str,
+            default='auto'
+    )
+def get_tokenizer(tokenizer_name: str, n_vocab: int) -> Tuple[
+    Callable[[List[int]], str],
+    Callable[[str], List[int]]
+]:
+    if tokenizer_name == 'auto':
+        if n_vocab == 50277:
+            tokenizer_name = '20B'
+        elif n_vocab == 65536:
+            tokenizer_name = 'world'
+        else:
+            raise ValueError(f'Can not guess the tokenizer from n_vocab value of {n_vocab}')
+    parent: pathlib.Path = pathlib.Path(os.path.abspath(__file__)).parent
+    if tokenizer_name == 'world':
+        print('Loading World v20230424 tokenizer')
+        return rwkv_world_tokenizer.get_world_tokenizer_v20230424()
+    elif tokenizer_name == '20B':
+        print('Loading 20B tokenizer')
+        import tokenizers
+        tokenizer: tokenizers.Tokenizer = tokenizers.Tokenizer.from_file(str(parent / '20B_tokenizer.json'))
+        return tokenizer.decode, lambda x: tokenizer.encode(x).ids
+    else:
+        raise ValueError(f'Unknown tokenizer {tokenizer_name}')