Spaces:

ashmib
/

gemma-gemini-eu-travels

Running

App Files Files Community

Ashmi Banerjee commited on Mar 25, 2024

Commit

420fa8a

1 Parent(s): b6dfdd7

updates with gemini

Browse files

Files changed (7) hide show

.gitignore +184 -0
build_rag.py +53 -0
models/gemini.py +19 -0
models/gemma.py +21 -0
requirements.txt +7 -1
setups/db_setup.py +27 -0
setups/vertex_ai_setup.py +20 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,184 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+.ipynb_checkpoints
+config
+notebooks/.ipynb_checkpoints
+.DS_Store
+notebooks/.DS_Store
+notebooks/.*
+scripts/.*
+data/embeddings/
+review-analysis-palm/data/
+gemma/data/
+config/
+gemma/flagged/
+gemma/.ipynb_checkpoints/
+gemma/.DS_Store
+gemma/notebooks/.ipynb_checkpoints/
+gemini/.ipynb_checkpoints/
+.github/
+gradio_cached_examples/
+ .idea/
+__pycache__/
+gradio_cached_examples/
+models/__pycache__/
+setups/__pycache__/
+setups/gcp-default-creds.json

build_rag.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from sentence_transformers import SentenceTransformer
+from setups.db_setup import get_mongo_client, get_mongo_url
+def get_embedding(text: str) -> list[float]:
+    embedding_model = SentenceTransformer("thenlper/gte-large")
+    if not text.strip():
+        print("Attempted to get embedding for empty text.")
+        return []
+    embedding = embedding_model.encode(text)
+    return embedding.tolist()
+def query_results(query, mongo_url):
+    mongo_client = get_mongo_client(mongo_url)
+    db = mongo_client["EU_Cities"]
+    query_embedding = get_embedding(query)
+    results = db.EU_cities_collection.aggregate([
+        {
+            "$vectorSearch": {
+                "index": "vector_index",
+                "path": "embedding",
+                "queryVector": query_embedding,
+                "numCandidates": 150,
+                "limit": 5
+            }
+        }
+    ])
+    return results
+def get_search_result(query, mongo_url):
+    get_knowledge = query_results(query, mongo_url)
+    print(get_knowledge)
+    search_result = ""
+    for result in get_knowledge:
+        search_result += f"City: {result.get('city', 'N/A')}, Abstract: {result.get('combined', 'N/A')}\n"
+    return search_result
+def get_context(query: str) -> str:
+    mongo_url = get_mongo_url()
+    source_information = get_search_result(query, mongo_url)
+    combined_information = (
+        f"Query: {query}\nContinue to answer the query by using the Search Results:\n{source_information}."
+    )
+    return combined_information

models/gemini.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from typing import Optional
+import sys
+sys.path.append("../")
+from setups.vertex_ai_setup import initialize_vertexai_params
+from vertexai.preview.generative_models import GenerativeModel
+def get_gemini_response(prompt_text, model, parameters: Optional = None) -> str:
+    initialize_vertexai_params()
+    if model is None or parameters is None:
+        model = "gemini-1.0-pro"
+        parameters = {
+            "max_output_tokens": 1024,
+            "temperature": 0.2
+        }
+    model = GenerativeModel.from_pretrained(model, **parameters)
+    model_response = model.generate_content(prompt_text)
+    return model_response

models/gemma.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from dotenv import load_dotenv
+from huggingface_hub import InferenceClient
+load_dotenv()
+def gemma_predict(combined_information, model_name):
+    HF_token = os.environ["HF_TOKEN"]
+    client = InferenceClient(model_name, token=HF_token)
+    stream = client.text_generation(prompt=combined_information, details=True, stream=True, max_new_tokens=2048,
+                                    return_full_text=False)
+    output = ""
+    for response in stream:
+        output += response.token.text
+    if "<eos>" in output:
+        output = output.split("<eos>")[0]
+    return output

requirements.txt CHANGED Viewed

@@ -1,4 +1,10 @@
 sentence-transformers==2.5.1
 gradio==3.45.0
 gradio_client==0.5.2
-pymongo==4.6.2

 sentence-transformers==2.5.1
 gradio==3.45.0
 gradio_client==0.5.2
+pymongo==4.6.2
+python-dotenv
+google-cloud-aiplatform
+google-cloud
+vertexai==1.43.0
+huggingface_hub==0.22.0
+certifi==2021.5.30

setups/db_setup.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import certifi
+import pymongo
+import os
+from dotenv import load_dotenv
+load_dotenv()
+def get_mongo_client(mongo_url):
+    """Establish connection to the MongoDB."""
+    if not mongo_url:
+        print("MONGO_URI not set in environment variables")
+    try:
+        client = pymongo.MongoClient(mongo_url, tlsCAFile=certifi.where())
+        print("Connection to MongoDB successful")
+        return client
+    except pymongo.errors.ConnectionFailure as e:
+        print(f"Connection failed: {e}")
+        return None
+def get_mongo_url():
+    username = os.environ["MONGO_USERNAME"]
+    password = os.environ["MONGO_PW"]
+    mongo_url = f"mongodb+srv://{username}:{password}@cluster0.62unmco.mongodb.net/"
+    return mongo_url

setups/vertex_ai_setup.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from typing import Optional
+from dotenv import load_dotenv
+from google.oauth2 import service_account
+import vertexai
+import os
+load_dotenv()
+# TODO: fix it in spaces
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "gcp-default-creds.json"
+GOOGLE_APPLICATION_CREDENTIALS = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
+VERTEXAI_PROJECT = os.environ["VERTEXAI_PROJECT"]
+def initialize_vertexai_params(location: Optional[str] = "us-central1"):
+    service_account.Credentials.from_service_account_file(
+        filename=GOOGLE_APPLICATION_CREDENTIALS,
+        scopes=["https://www.googleapis.com/auth/cloud-platform"],
+    )
+    vertexai.init(project=VERTEXAI_PROJECT, location=location)