File size: 2,716 Bytes
3e22f77
31ae1e9
3e22f77
7a321ee
3e22f77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a321ee
3e22f77
 
7a321ee
3e22f77
 
7a321ee
3e22f77
 
 
 
 
 
 
 
 
 
e0e99ed
3e22f77
31ae1e9
 
 
 
 
 
 
 
 
 
7a321ee
31ae1e9
 
 
3e22f77
 
7a321ee
 
 
 
 
 
3e22f77
 
7a321ee
 
3e22f77
7a321ee
 
 
31ae1e9
7a321ee
 
 
 
 
3e22f77
7a321ee
e804a95
 
 
6534a76
7a321ee
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import re
import requests
import gradio as gr
import pandas as pd
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

def process_tweet(tweet):
    # remove links
    tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))', '', tweet)
    # remove usernames
    tweet = re.sub('@[^\s]+', '', tweet)
    # remove additional white spaces
    tweet = re.sub('[\s]+', ' ', tweet)
    # replace hashtags with words
    tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
    # trim
    tweet = tweet.strip('\'"')
    return tweet #if len(tweet) > 0 else ""

tokenizer = AutoTokenizer.from_pretrained(
    "azamat/geocoder_coordinates_model"
)

relevancy_pipeline = pipeline("sentiment-analysis", model="azamat/geocoder_relevancy_model")

coordinates_model = AutoModelForSequenceClassification.from_pretrained(
    "azamat/geocoder_coordinates_model",
)

def predict_relevancy(text):
    outputs = relevancy_pipeline(text)
    return outputs[0]['label'], outputs[0]['score']

def predict_coordinates(text):
    encoding = tokenizer(text, padding="max_length", truncation=True, \
        max_length=128, return_tensors='pt')
    outputs = coordinates_model(**encoding)
    return round(outputs[0][0][0].item(), 3), round(outputs[0][0][1].item(), 3)

def reverse_geocode(lat, lon):
    payload = {
        'lat'             : lat, 
        'lon'             : lon, 
        'zoom'            : 12, 
        'format'          : 'jsonv2',
        'accept-language' : 'en'
    }
    try:
        r = requests.get('https://geocode.maps.co/reverse', params=payload)
        return f"Reverse geocoded coordinates: {r.json()['display_name']}"
    except:
        return "Service couldn't reverse geocode provided coordinates."

def predict(text):
    text = process_tweet(text)
    data = {
        "relevancy_score"  : 0,
        "lat"              : 0,
        "lon"              : 0,
        "reversed lat/lon" : ""
    }
    relevancy_label, relevancy_score = predict_relevancy(text)
    if relevancy_label == 'relevant':
        data['relevancy_score'] = relevancy_score
        
        lat, lon = predict_coordinates(text)
        data['lat'] = lat
        data['lon'] = lon
        
        reverse_geocoded = reverse_geocode(lat, lon)
        data['reversed lat/lon'] = reverse_geocoded
        
    return pd.DataFrame([data])

with gr.Blocks() as demo:

    gr.Markdown("# **<p align='center'>Twitter geocoding with 🤗 Transformers</p>**")
    inputs = gr.Textbox(placeholder="Enter the tweet")
    outputs = [gr.Dataframe(label="Geocoded data")]
    inputs.submit(predict, inputs=inputs, outputs=outputs)

if __name__ == "__main__":
    demo.launch()